/* Copyright (C) 2021 Free Software Foundation, Inc. Contributed by Oracle. This file is part of GNU Binutils. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include "hwcdrv.h" /*---------------------------------------------------------------------------*/ /* macros */ #define IS_GLOBAL /* Mark global symbols */ #include "cpuid.c" /* ftns for identifying a chip */ static hdrv_pcbe_api_t hdrv_pcbe_core_api; static hdrv_pcbe_api_t hdrv_pcbe_opteron_api; static hdrv_pcbe_api_t *hdrv_pcbe_drivers[] = { &hdrv_pcbe_core_api, &hdrv_pcbe_opteron_api, NULL }; #include "opteron_pcbe.c" /* CPU-specific code */ #include "core_pcbe.c" /* CPU-specific code */ extern hwcdrv_api_t hwcdrv_pcl_api; IS_GLOBAL hwcdrv_api_t *hwcdrv_drivers[] = { &hwcdrv_pcl_api, NULL }; /*---------------------------------------------------------------------------*/ /* utils for drivers */ IS_GLOBAL int hwcdrv_assign_all_regnos (Hwcentry* entries[], unsigned numctrs) { unsigned int pmc_assigned[MAX_PICS]; unsigned idx; for (int ii = 0; ii < MAX_PICS; ii++) pmc_assigned[ii] = 0; /* assign the HWCs that we already know about */ for (idx = 0; idx < numctrs; idx++) { regno_t regno = entries[idx]->reg_num; if (regno == REGNO_ANY) { /* check to see if list of possible registers only contains one entry */ regno = REG_LIST_SINGLE_VALID_ENTRY (entries[idx]->reg_list); } if (regno != REGNO_ANY) { if (regno < 0 || regno >= MAX_PICS || !regno_is_valid (entries[idx], regno)) { logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/ return HWCFUNCS_ERROR_HWCARGS; } TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): preselected: idx=%d, regno=%d\n", idx, regno); entries[idx]->reg_num = regno; /* assigning back to entries */ pmc_assigned[regno] = 1; } } /* assign HWCs that are currently REGNO_ANY */ for (idx = 0; idx < numctrs; idx++) { if (entries[idx]->reg_num == REGNO_ANY) { int assigned = 0; regno_t *reg_list = entries[idx]->reg_list; for (; reg_list && *reg_list != REGNO_ANY; reg_list++) { regno_t regno = *reg_list; if (regno < 0 || regno >= MAX_PICS) { logerr (GTXT ("For counter #%d, register %d is out of range\n"), idx + 1, regno); /*!*/ return HWCFUNCS_ERROR_HWCARGS; } if (pmc_assigned[regno] == 0) { TprintfT (DBG_LT2, "hwcfuncs_assign_regnos(): assigned: idx=%d, regno=%d\n", idx, regno); entries[idx]->reg_num = regno; /* assigning back to entries */ pmc_assigned[regno] = 1; assigned = 1; break; } } if (!assigned) { logerr (GTXT ("Counter '%s' could not be bound to a register\n"), entries[idx]->name ? entries[idx]->name : ""); return HWCFUNCS_ERROR_HWCARGS; } } } return 0; } IS_GLOBAL int hwcdrv_lookup_cpuver (const char * cpcN_cciname) { libcpc2_cpu_lookup_t *plookup; static libcpc2_cpu_lookup_t cpu_table[] = { LIBCPC2_CPU_LOOKUP_LIST }; if (cpcN_cciname == NULL) return CPUVER_UNDEFINED; /* search table for name */ for (plookup = cpu_table; plookup->cpc2_cciname; plookup++) { int n = strlen (plookup->cpc2_cciname); if (!strncmp (plookup->cpc2_cciname, cpcN_cciname, n)) return plookup->cpc2_cpuver; } /* unknown, but does have a descriptive string */ TprintfT (DBG_LT0, "hwcfuncs: CPC2: WARNING: Id of processor '%s' " "could not be determined\n", cpcN_cciname); return CPUVER_GENERIC; } /*---------------------------------------------------------------------------*/ /* utils to generate x86 register definitions on Linux */ /* * This code is structured as though we're going to initialize the * HWC by writing the Intel MSR register directly. That is, we * assume the lowest 16 bits of the event number will have the event * and that higher bits will set attributes. * * While SPARC is different, we can nonetheless use basically the * same "x86"-named functions: * * - The event code will still be 16 bits. It will still * be in the lowest 16 bits of the event number. Though * perf_event_code() on SPARC will expect those bits to * shifted, hwcdrv_pcl.c can easily perform that shift. * * - On SPARC we support only two attributes, "user" and "system", * which hwcdrv_pcl.c already converts to the "exclude_user" * and "exclude_kernel" fields expected by perf_event_open(). * "user" and "system" are stored in event bits 16 and 17. * For M8, a 4-bit mask of supported PICs is stored in bits [23:20]. */ IS_GLOBAL hwcdrv_get_eventnum_fn_t *hwcdrv_get_x86_eventnum = 0; static const attr_info_t perfctr_sparc_attrs[] = { {NTXT ("user"), 0, 0x01, 16}, //usr {NTXT ("system"), 0, 0x01, 17}, //os {NULL, 0, 0x00, 0}, }; static const attr_info_t perfctr_x64_attrs[] = {/* ok for Core2 & later */ {NTXT ("umask"), 0, 0xff, 8}, {NTXT ("user"), 0, 0x01, 16}, //usr //{NTXT("nouser"), 1, 0x01, 16}, //usr (inverted) {NTXT ("system"), 0, 0x01, 17}, //os {NTXT ("edge"), 0, 0x01, 18}, {NTXT ("pc"), 0, 0x01, 19}, {NTXT ("inv"), 0, 0x01, 23}, {NTXT ("cmask"), 0, 0xff, 24}, {NULL, 0, 0x00, 0}, }; const attr_info_t *perfctr_attrs_table = perfctr_x64_attrs; static const eventsel_t perfctr_evntsel_enable_bits = (0x01 << 16) | /* usr */ // (0xff << 0) | /* event*/ // (0xff << 8) | /* umask */ // (0x01 << 17) | /* os */ // (0x01 << 18) | /* edge */ // (0x01 << 19) | /* pc */ (0x01 << 20) | /* int */ // (0x01 << 21) | /* reserved */ (0x01 << 22) | /* enable */ // (0x01 << 23) | /* inv */ // (0xff << 24) | /* cmask */ 0; static int myperfctr_get_x86_eventnum (const char *eventname, uint_t pmc, eventsel_t *eventsel, eventsel_t *valid_umask, uint_t *pmc_sel) { if (hwcdrv_get_x86_eventnum && !hwcdrv_get_x86_eventnum (eventname, pmc, eventsel, valid_umask, pmc_sel)) return 0; /* check for numerically-specified counters */ char * endptr; uint64_t num = strtoull (eventname, &endptr, 0); if (*eventname && !*endptr) { *eventsel = EXTENDED_EVNUM_2_EVSEL (num); *valid_umask = 0xff; /* allow any umask (unused for SPARC?) */ *pmc_sel = pmc; return 0; } /* name does not specify a numeric value */ *eventsel = (eventsel_t) - 1; *valid_umask = 0x0; *pmc_sel = pmc; return -1; } static int mask_shift_set (eventsel_t *presult, eventsel_t invalue, eventsel_t mask, eventsel_t shift) { if (invalue & ~mask) return -1; /* invalue attempts to set bits outside of mask */ *presult &= ~(mask << shift); /* clear all the mask bits */ *presult |= (invalue << shift); /* set bits according to invalue */ return 0; } static int set_x86_attr_bits (eventsel_t *result_mask, eventsel_t evnt_valid_umask, hwcfuncs_attr_t attrs[], int nattrs, const char*nameOnly) { eventsel_t evntsel = *result_mask; for (int ii = 0; ii < (int) nattrs; ii++) { const char *attrname = attrs[ii].ca_name; eventsel_t attrval = (eventsel_t) attrs[ii].ca_val; const char *tmpname; int attr_found = 0; for (int jj = 0; (tmpname = perfctr_attrs_table[jj].attrname); jj++) { if (strcmp (attrname, tmpname) == 0) { if (strcmp (attrname, "umask") == 0) { if (attrval & ~evnt_valid_umask) { logerr (GTXT ("for `%s', allowable umask bits are: 0x%llx\n"), nameOnly, (long long) evnt_valid_umask); return -1; } } if (mask_shift_set (&evntsel, perfctr_attrs_table[jj].is_inverted ? (attrval^1) : attrval, perfctr_attrs_table[jj].mask, perfctr_attrs_table[jj].shift)) { logerr (GTXT ("`%s' attribute `%s' could not be set to 0x%llx\n"), nameOnly, attrname, (long long) attrval); return -1; } TprintfT (DBG_LT2, "hwcfuncs: Counter %s, attribute %s set to 0x%llx\n", nameOnly, attrname, (long long) attrval); attr_found = 1; break; } } if (!attr_found) { logerr (GTXT ("attribute `%s' is invalid\n"), attrname); return -1; } } *result_mask = evntsel; return 0; } IS_GLOBAL int hwcfuncs_get_x86_eventsel (unsigned int regno, const char *int_name, eventsel_t *return_event, uint_t *return_pmc_sel) { hwcfuncs_attr_t attrs[HWCFUNCS_MAX_ATTRS + 1]; unsigned nattrs = 0; char *nameOnly = NULL; eventsel_t evntsel = 0; // event number eventsel_t evnt_valid_umask = 0; uint_t pmc_sel = 0; int rc = -1; *return_event = 0; *return_pmc_sel = 0; void *attr_mem = hwcfuncs_parse_attrs (int_name, attrs, HWCFUNCS_MAX_ATTRS, &nattrs, NULL); if (!attr_mem) { logerr (GTXT ("out of memory, could not parse attributes\n")); return -1; } hwcfuncs_parse_ctr (int_name, NULL, &nameOnly, NULL, NULL, NULL); if (regno == REGNO_ANY) { logerr (GTXT ("reg# could not be determined for `%s'\n"), nameOnly); goto attr_wrapup; } /* look up evntsel */ if (myperfctr_get_x86_eventnum (nameOnly, regno, &evntsel, &evnt_valid_umask, &pmc_sel)) { logerr (GTXT ("counter `%s' is not valid\n"), nameOnly); goto attr_wrapup; } TprintfT (DBG_LT1, "hwcfuncs: event=0x%llx pmc=0x%x '%s' nattrs = %u\n", (long long) evntsel, pmc_sel, nameOnly, nattrs); /* determine event attributes */ eventsel_t evnt_attrs = perfctr_evntsel_enable_bits; if (set_x86_attr_bits (&evnt_attrs, evnt_valid_umask, attrs, nattrs, nameOnly)) goto attr_wrapup; if (evntsel & evnt_attrs) TprintfT (DBG_LT0, "hwcfuncs: ERROR - evntsel & enable bits overlap: 0x%llx 0x%llx 0x%llx\n", (long long) evntsel, (long long) evnt_attrs, (long long) (evntsel & evnt_attrs)); *return_event = evntsel | evnt_attrs; *return_pmc_sel = pmc_sel; rc = 0; attr_wrapup: free (attr_mem); free (nameOnly); return rc; } #ifdef __x86_64__ #define syscall_instr "syscall" #define syscall_clobber "rcx", "r11", "memory" #endif #ifdef __i386__ #define syscall_instr "int $0x80" #define syscall_clobber "memory" #endif static inline int perf_event_open (struct perf_event_attr *hw_event_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags) { /* It seems that perf_event_open() sometimes fails spuriously, * even while an immediate retry succeeds. * So, let's try a few retries if the call fails just to be sure. */ int rc; for (int retry = 0; retry < 5; retry++) { rc = syscall (__NR_perf_event_open, hw_event_uptr, pid, cpu, group_fd, flags); if (rc != -1) return rc; } return rc; } /*---------------------------------------------------------------------------*/ /* macros & fwd prototypes */ #define HWCDRV_API static /* Mark functions used by hwcdrv API */ HWCDRV_API int hwcdrv_start (void); HWCDRV_API int hwcdrv_free_counters (); static pid_t hwcdrv_gettid (void) { #ifndef LIBCOLLECTOR_SRC return syscall (__NR_gettid); #elif defined(intel) pid_t r; __asm__ __volatile__(syscall_instr : "=a" (r) : "0" (__NR_gettid) : syscall_clobber); return r; #else return syscall (__NR_gettid); // FIXUP_XXX_SPARC_LINUX // write gettid in asm #endif } /*---------------------------------------------------------------------------*/ /* types */ #define NPAGES_PER_BUF 1 // number of pages to be used for perf_event samples // must be a power of 2 /*---------------------------------------------------------------------------*/ /* typedefs */ typedef struct { // event (hwc) definition unsigned int reg_num; // PMC assignment, potentially for detecting conflicts eventsel_t eventsel; // raw event bits (Intel/AMD) uint64_t counter_preload; // number of HWC events before signal struct perf_event_attr hw; // perf_event definition hrtime_t min_time; // minimum time we're targeting between events char *name; } perf_event_def_t; typedef struct { // runtime state of perf_event buffer void *buf; // pointer to mmapped buffer size_t pagesz; // size of pages } buffer_state_t; typedef struct { // runtime state of counter values uint64_t prev_ena_ts; // previous perf_event "enabled" time uint64_t prev_run_ts; // previous perf_event "running" time uint64_t prev_value; // previous HWC value } counter_value_state_t; typedef struct { // per-counter information perf_event_def_t *ev_def; // global HWC definition for one counter int fd; // perf_event fd buffer_state_t buf_state; // perf_event buffer's state counter_value_state_t value_state; // counter state int needs_restart; // workaround for dbx failure to preserve si_fd uint64_t last_overflow_period; hrtime_t last_overflow_time; } counter_state_t; typedef struct { // per-thread context counter_state_t *ctr_list; int signal_fd; // fd that caused the most recent signal pid_t tid; // for debugging signal delivery problems } hdrv_pcl_ctx_t; /*---------------------------------------------------------------------------*/ /* static variables */ static struct { int library_ok; int internal_open_called; hwcfuncs_tsd_get_fn_t find_vpc_ctx; unsigned hwcdef_cnt; /* number of *active* hardware counters */ hwcdrv_get_events_fn_t *get_events; } hdrv_pcl_state; static hwcdrv_about_t hdrv_pcl_about = {.cpcN_cpuver = CPUVER_UNDEFINED}; static perf_event_def_t global_perf_event_def[MAX_PICS]; #define COUNTERS_ENABLED() (hdrv_pcl_state.hwcdef_cnt) /* perf_event buffer formatting and handling */ static void reset_buf (buffer_state_t *bufstate) { TprintfT (0, "hwcdrv: ERROR: perf_event reset_buf() called!\n"); struct perf_event_mmap_page *metadata = bufstate->buf; if (metadata) metadata->data_tail = metadata->data_head; } static int skip_buf (buffer_state_t *bufstate, size_t sz) { TprintfT (DBG_LT1, "hwcdrv: WARNING: perf_event skip_buf called!\n"); struct perf_event_mmap_page *metadata = bufstate->buf; if (metadata == NULL) return -1; size_t pgsz = bufstate->pagesz; size_t bufsz = NPAGES_PER_BUF*pgsz; uint64_t d_tail = metadata->data_tail; uint64_t d_head = metadata->data_head; // validate request size if (sz > d_head - d_tail || sz >= bufsz) { reset_buf (bufstate); return -1; } metadata->data_tail = d_tail + sz; // advance tail return 0; } static int read_buf (buffer_state_t *bufstate, void *buf, size_t sz) { struct perf_event_mmap_page *metadata = bufstate->buf; if (metadata == NULL) return -1; size_t pgsz = bufstate->pagesz; size_t bufsz = NPAGES_PER_BUF*pgsz; uint64_t d_tail = metadata->data_tail; uint64_t d_head = metadata->data_head; // validate request size if (sz > d_head - d_tail || sz >= bufsz) { reset_buf (bufstate); return -1; } char *buf_base = ((char *) metadata) + pgsz; // start of data buffer uint64_t start_pos = d_tail & (bufsz - 1); // char offset into data buffer size_t nbytes = sz; if (start_pos + sz > bufsz) { // will wrap past end of buffer nbytes = bufsz - start_pos; memcpy (buf, buf_base + start_pos, nbytes); start_pos = 0; // wrap to start buf = (void *) (((char *) buf) + nbytes); nbytes = sz - nbytes; } memcpy (buf, buf_base + start_pos, nbytes); metadata->data_tail += sz; return 0; } static int read_u64 (buffer_state_t *bufstate, uint64_t *value) { return read_buf (bufstate, value, sizeof (uint64_t)); } static int read_sample (counter_state_t *ctr_state, int msgsz, uint64_t *rvalue, uint64_t *rlost) { // returns count of bytes read buffer_state_t *bufstate = &ctr_state->buf_state; counter_value_state_t *cntstate = &ctr_state->value_state; int readsz = 0; // PERF_SAMPLE_IP uint64_t ipc = 0; int rc = read_u64 (bufstate, &ipc); if (rc) return -1; readsz += sizeof (uint64_t); // PERF_SAMPLE_READ: value uint64_t value = 0; rc = read_u64 (bufstate, &value); if (rc) return -2; readsz += sizeof (uint64_t); /* Bug 20806896 * Old Linux kernels (e.g. 2.6.32) on certain systems return enabled and * running times in the sample data that correspond to the metadata times * metadata->time_enabled * metadata->time_running * from the PREVIOUS (not current) sample. Probably just ignore this bug * since it's on old kernels and we only use the enabled and running times * to construct loss_estimate. */ // PERF_SAMPLE_READ: PERF_FORMAT_ENABLED uint64_t enabled_time = 0; rc = read_u64 (bufstate, &enabled_time); if (rc) return -3; readsz += sizeof (uint64_t); // PERF_SAMPLE_READ: PERF_FORMAT_RUNNING uint64_t running_time = 0; rc = read_u64 (bufstate, &running_time); if (rc) return -4; readsz += sizeof (uint64_t); uint64_t value_delta = value - cntstate->prev_value; uint64_t enabled_delta = enabled_time - cntstate->prev_ena_ts; uint64_t running_delta = running_time - cntstate->prev_run_ts; cntstate->prev_value = value; cntstate->prev_ena_ts = enabled_time; cntstate->prev_run_ts = running_time; // 24830461 need workaround for Linux anomalous HWC skid overrun int set_error_flag = 0; if (value_delta > 2 * ctr_state->last_overflow_period + 2000 /* HWC_SKID_TOLERANCE */) set_error_flag = 1; uint64_t loss_estimate = 0; // estimate loss of events caused by multiplexing if (running_delta == enabled_delta) { // counter was running 100% of time, no multiplexing } else if (running_delta == 0) loss_estimate = 1; // token amount to aid in debugging perfctr oddities else if ((running_delta > enabled_delta) || (enabled_delta & 0x1000000000000000ll)) { // running should be smaller than enabled, can't estimate /* * 21418391 HWC can have a negative count * * We've also seen enabled not only be smaller than running * but in fact go negative. Guard against this. */ loss_estimate = 2; // token amount to aid in debugging perfctr oddities } else { // counter was running less than 100% of time // Example: ena=7772268 run=6775669 raw_value=316004 scaled_value=362483 loss_est=46479 uint64_t scaled_delta = (double) value_delta * enabled_delta / running_delta; value_delta = scaled_delta; #if 0 // We should perhaps warn the user that multiplexing is going on, // but hwcdrv_pcl.c doesn't know about the collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_* values. // For now we simply don't report. // Perhaps we should address the issue not here but in the caller collector_sigemt_handler(), // but at that level "lost" has a meaning that's considerably broader than just multiplexing. collector_interface->writeLog ("%s %d -> %d\n", SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name, ctr_list[idx].last_overflow_period, new_period); #endif } TprintfT ((loss_estimate || set_error_flag) ? DBG_LT1 : DBG_LT3, "hwcdrv: '%s' ipc=0x%llx ena=%llu run=%llu " "value_delta=%lld(0x%llx) loss_est=%llu %s error_flag='%s'\n", ctr_state->ev_def->name, (long long) ipc, (long long) enabled_delta, (long long) running_delta, (long long) value_delta, (long long) value_delta, (unsigned long long) loss_estimate, loss_estimate ? ", WARNING - SCALED" : "", set_error_flag ? ", ERRORFLAG" : ""); if (set_error_flag == 1) value_delta |= (1ULL << 63) /* HWCVAL_ERR_FLAG */; *rvalue = value_delta; *rlost = loss_estimate; if (readsz != msgsz) { TprintfT (0, "hwcdrv: ERROR: perf_event sample not fully parsed\n"); return -5; } return 0; } static void dump_perf_event_attr (struct perf_event_attr *at) { TprintfT (DBG_LT2, "dump_perf_event_attr: size=%d type=%d sample_period=%lld\n" " config=0x%llx config1=0x%llx config2=0x%llx wakeup_events=%lld __reserved_1=%lld\n", (int) at->size, (int) at->type, (unsigned long long) at->sample_period, (unsigned long long) at->config, (unsigned long long) at->config1, (unsigned long long) at->config2, (unsigned long long) at->wakeup_events, (unsigned long long) at->__reserved_1); #define DUMP_F(fld) if (at->fld) TprintfT(DBG_LT2, " %-10s : %lld\n", #fld, (long long) at->fld) DUMP_F (disabled); DUMP_F (inherit); DUMP_F (pinned); DUMP_F (exclusive); DUMP_F (exclude_user); DUMP_F (exclude_kernel); DUMP_F (exclude_hv); DUMP_F (exclude_idle); // DUMP_F(xmmap); DUMP_F (comm); DUMP_F (freq); DUMP_F (inherit_stat); DUMP_F (enable_on_exec); DUMP_F (task); DUMP_F (watermark); } static void init_perf_event (struct perf_event_attr *hw, uint64_t event, uint64_t period) { memset (hw, 0, sizeof (struct perf_event_attr)); hw->size = sizeof (struct perf_event_attr); // fwd/bwd compat #if defined(__i386__) || defined(__x86_64) //note: Nehalem/Westmere OFFCORE_RESPONSE in upper 32 bits hw->config = event; hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw... #elif defined(__aarch64__) hw->type = (event >> 24) & 7; hw->config = event & 0xff; #elif defined(sparc) //SPARC needs to be shifted up 16 bits hw->config = (event & 0xFFFF) << 16; // uint64_t event uint64_t regs = (event >> 20) & 0xf; // see sparc_pcbe.c hw->config |= regs << 4; // for M8, supported PICs need to be placed at bits [7:4] hw->type = PERF_TYPE_RAW; // hw/sw/trace/raw... #endif hw->sample_period = period; hw->sample_type = PERF_SAMPLE_IP | // PERF_SAMPLE_TID | // PERF_SAMPLE_TIME | // possibly interesting // PERF_SAMPLE_ADDR | PERF_SAMPLE_READ | // HWC value // PERF_SAMPLE_CALLCHAIN | // interesting // PERF_SAMPLE_ID | // PERF_SAMPLE_CPU | // possibly interesting // PERF_SAMPLE_PERIOD | // PERF_SAMPLE_STREAM_ID | // PERF_SAMPLE_RAW | 0; hw->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | // detect when hwc not scheduled PERF_FORMAT_TOTAL_TIME_RUNNING | // detect when hwc not scheduled // PERF_FORMAT_ID | // PERF_FORMAT_GROUP | 0; hw->disabled = 1; /* off by default */ // Note: the following override config.priv bits! hw->exclude_user = (event & (1 << 16)) == 0; /* don't count user */ hw->exclude_kernel = (event & (1 << 17)) == 0; /* ditto kernel */ hw->exclude_hv = 1; /* ditto hypervisor */ hw->wakeup_events = 1; /* wakeup every n events */ dump_perf_event_attr (hw); } static int start_one_ctr (int ii, size_t pgsz, hdrv_pcl_ctx_t * pctx, char *error_string) { // pe_attr should have been initialized in hwcdrv_create_counters() struct perf_event_attr pe_attr; memcpy (&pe_attr, &global_perf_event_def[ii].hw, sizeof (pe_attr)); // but we adjust the period, so make sure that pctx->ctr_list[ii].last_overflow_period has been set pe_attr.sample_period = pctx->ctr_list[ii].last_overflow_period; int hwc_fd = perf_event_open (&pe_attr, pctx->tid, -1, -1, 0); if (hwc_fd == -1) { TprintfT (DBG_LT1, "%s idx=%d perf_event_open failed, errno=%d\n", error_string, ii, errno); return 1; } size_t buffer_area_sz = (NPAGES_PER_BUF + 1) * pgsz; // add a page for metadata void * buf = mmap (NULL, buffer_area_sz, //YXXX is this a safe call? PROT_READ | PROT_WRITE, MAP_SHARED, hwc_fd, 0); if (buf == MAP_FAILED) { TprintfT (0, "sz = %ld, pgsz = %ld\n err=%s idx=%d mmap failed: %s\n", (long) buffer_area_sz, (long) pgsz, error_string, ii, strerror (errno)); return 1; } pctx->ctr_list[ii].ev_def = &global_perf_event_def[ii]; // why do we set ev_def? we never seem to use it pctx->ctr_list[ii].fd = hwc_fd; pctx->ctr_list[ii].buf_state.buf = buf; pctx->ctr_list[ii].buf_state.pagesz = pgsz; pctx->ctr_list[ii].value_state.prev_ena_ts = 0; pctx->ctr_list[ii].value_state.prev_run_ts = 0; pctx->ctr_list[ii].value_state.prev_value = 0; pctx->ctr_list[ii].last_overflow_time = gethrtime (); /* set async mode */ long flags = fcntl (hwc_fd, F_GETFL, 0) | O_ASYNC; int rc = fcntl (hwc_fd, F_SETFL, flags); if (rc == -1) { TprintfT (0, "%s idx=%d O_ASYNC failed\n", error_string, ii); return 1; } /* * set lwp ownership of the fd * See BUGS section of "man perf_event_open": * The F_SETOWN_EX option to fcntl(2) is needed to properly get * overflow signals in threads. This was introduced in Linux 2.6.32. * Legacy references: * see http://lkml.org/lkml/2009/8/4/128 * google man fcntl F_SETOWN_EX -conflict * "From Linux 2.6.32 onward, use F_SETOWN_EX to target * SIGIO and SIGURG signals at a particular thread." * http://icl.cs.utk.edu/papi/docs/da/d2a/examples__v2_8x_2self__smpl__multi_8c.html * See 2010 CSCADS presentation by Eranian */ struct f_owner_ex fowner_ex; fowner_ex.type = F_OWNER_TID; fowner_ex.pid = pctx->tid; rc = fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex); if (rc == -1) { TprintfT (0, "%s idx=%d F_SETOWN failed\n", error_string, ii); return 1; } /* Use sigio so handler can determine FD via siginfo->si_fd. */ rc = fcntl (hwc_fd, F_SETSIG, SIGIO); if (rc == -1) { TprintfT (0, "%s idx=%d F_SETSIG failed\n", error_string, ii); return 1; } return 0; } static int stop_one_ctr (int ii, counter_state_t *ctr_list) { int hwc_rc = 0; if (-1 == ioctl (ctr_list[ii].fd, PERF_EVENT_IOC_DISABLE, 1)) { TprintfT (0, "hwcdrv: ERROR: PERF_EVENT_IOC_DISABLE #%d failed: errno=%d\n", ii, errno); hwc_rc = HWCFUNCS_ERROR_GENERIC; } void *buf = ctr_list[ii].buf_state.buf; if (buf) { size_t bufsz = (NPAGES_PER_BUF + 1) * ctr_list[ii].buf_state.pagesz; ctr_list[ii].buf_state.buf = NULL; int tmprc = munmap (buf, bufsz); if (tmprc) { TprintfT (0, "hwcdrv: ERROR: munmap() #%d failed: errno=%d\n", ii, errno); hwc_rc = HWCFUNCS_ERROR_GENERIC; } } if (-1 == close (ctr_list[ii].fd)) { TprintfT (0, "hwcdrv: ERROR: close(fd) #%d failed: errno=%d\n", ii, errno); hwc_rc = HWCFUNCS_ERROR_GENERIC; } return hwc_rc; } /* HWCDRV_API for thread-specific actions */ HWCDRV_API int hwcdrv_lwp_init (void) { return hwcdrv_start (); } HWCDRV_API void hwcdrv_lwp_fini (void) { hwcdrv_free_counters (); /* also sets pctx->ctr_list=NULL; */ } /* open */ static int hdrv_pcl_internal_open () { if (hdrv_pcl_state.internal_open_called) { TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open: already called\n"); return HWCFUNCS_ERROR_ALREADY_CALLED; } // determine if PCL is available perf_event_def_t tmp_event_def; memset (&tmp_event_def, 0, sizeof (tmp_event_def)); struct perf_event_attr *pe_attr = &tmp_event_def.hw; init_perf_event (pe_attr, 0, 0); pe_attr->type = PERF_TYPE_HARDWARE; // specify abstracted HW event pe_attr->config = PERF_COUNT_HW_INSTRUCTIONS; // specify abstracted insts int hwc_fd = perf_event_open (pe_attr, 0, // pid/tid, 0 is self -1, // cpu, -1 is per-thread mode -1, // group_fd, -1 is root 0); // flags if (hwc_fd == -1) { TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open:" " perf_event_open() failed, errno=%d\n", errno); goto internal_open_error; } /* see if the PCL is new enough to know about F_SETOWN_EX */ struct f_owner_ex fowner_ex; fowner_ex.type = F_OWNER_TID; fowner_ex.pid = hwcdrv_gettid (); // "pid=tid" is correct w/F_OWNER_TID if (fcntl (hwc_fd, F_SETOWN_EX, (unsigned long) &fowner_ex) == -1) { TprintfT (DBG_LT1, "hwcdrv: WARNING: hdrv_pcl_internal_open: " "F_SETOWN failed, errno=%d\n", errno); close (hwc_fd); goto internal_open_error; } close (hwc_fd); hdrv_pcl_state.internal_open_called = 1; hdrv_pcl_state.library_ok = 1; // set to non-zero to show it's initted hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED; TprintfT (DBG_LT2, "hwcdrv: hdrv_pcl_internal_open()\n"); for (int ii = 0; hdrv_pcbe_drivers[ii]; ii++) { hdrv_pcbe_api_t *ppcbe = hdrv_pcbe_drivers[ii]; if (!ppcbe->hdrv_pcbe_init ()) { hdrv_pcl_about.cpcN_cciname = ppcbe->hdrv_pcbe_impl_name (); hdrv_pcl_about.cpcN_cpuver = hwcdrv_lookup_cpuver (hdrv_pcl_about.cpcN_cciname); if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED) goto internal_open_error; hdrv_pcl_about.cpcN_npics = ppcbe->hdrv_pcbe_ncounters (); hdrv_pcl_about.cpcN_docref = ppcbe->hdrv_pcbe_cpuref (); hdrv_pcl_state.get_events = ppcbe->hdrv_pcbe_get_events; hwcdrv_get_x86_eventnum = ppcbe->hdrv_pcbe_get_eventnum; break; } } if (hdrv_pcl_about.cpcN_npics > MAX_PICS) { TprintfT (0, "hwcdrv: WARNING: hdrv_pcl_internal_open:" " reducing number of HWCs from %u to %u on processor '%s'\n", hdrv_pcl_about.cpcN_npics, MAX_PICS, hdrv_pcl_about.cpcN_cciname); hdrv_pcl_about.cpcN_npics = MAX_PICS; } TprintfT (DBG_LT1, "hwcdrv: hdrv_pcl_internal_open:" " perf_event cpuver=%d, name='%s'\n", hdrv_pcl_about.cpcN_cpuver, hdrv_pcl_about.cpcN_cciname); return 0; internal_open_error: hdrv_pcl_about.cpcN_cpuver = CPUVER_UNDEFINED; hdrv_pcl_about.cpcN_npics = 0; hdrv_pcl_about.cpcN_docref = NULL; hdrv_pcl_about.cpcN_cciname = NULL; return HWCFUNCS_ERROR_NOT_SUPPORTED; } static void * single_thread_tsd_ftn () { static hdrv_pcl_ctx_t tsd_context; return &tsd_context; } /* HWCDRV_API */ HWCDRV_API int hwcdrv_init (hwcfuncs_abort_fn_t abort_ftn, int *tsd_sz) { hdrv_pcl_state.find_vpc_ctx = single_thread_tsd_ftn; if (tsd_sz) *tsd_sz = sizeof (hdrv_pcl_ctx_t); if (hdrv_pcl_state.internal_open_called) return HWCFUNCS_ERROR_ALREADY_CALLED; return hdrv_pcl_internal_open (); } HWCDRV_API void hwcdrv_get_info (int *cpuver, const char **cciname, uint_t *npics, const char **docref, uint64_t *support) { if (cpuver) *cpuver = hdrv_pcl_about.cpcN_cpuver; if (cciname) *cciname = hdrv_pcl_about.cpcN_cciname; if (npics) *npics = hdrv_pcl_about.cpcN_npics; if (docref) *docref = hdrv_pcl_about.cpcN_docref; if (support) *support = HWCFUNCS_SUPPORT_OVERFLOW_PROFILING | HWCFUNCS_SUPPORT_OVERFLOW_CTR_ID; } HWCDRV_API int hwcdrv_enable_mt (hwcfuncs_tsd_get_fn_t tsd_ftn) { if (tsd_ftn) hdrv_pcl_state.find_vpc_ctx = tsd_ftn; else { TprintfT (0, "hwcdrv: ERROR: enable_mt(): tsd_ftn==NULL\n"); return HWCFUNCS_ERROR_UNAVAIL; } return 0; } HWCDRV_API int hwcdrv_get_descriptions (hwcf_hwc_cb_t *hwc_cb, hwcf_attr_cb_t *attr_cb) { int count = 0; if (hwc_cb && hdrv_pcl_state.get_events) count = hdrv_pcl_state.get_events (hwc_cb); if (attr_cb) for (int ii = 0; perfctr_attrs_table && perfctr_attrs_table[ii].attrname; ii++) attr_cb (perfctr_attrs_table[ii].attrname); if (!count) return -1; return 0; } HWCDRV_API int hwcdrv_assign_regnos (Hwcentry* entries[], unsigned numctrs) { return hwcdrv_assign_all_regnos (entries, numctrs); } static int internal_hwc_start (int fd) { int rc = ioctl (fd, PERF_EVENT_IOC_REFRESH, 1); if (rc == -1) { TprintfT (DBG_LT0, "hwcdrv: ERROR: internal_hwc_start:" " PERF_EVENT_IOC_REFRESH(fd=%d) failed: errno=%d\n", fd, errno); return HWCFUNCS_ERROR_UNAVAIL; } TprintfT (DBG_LT3, "hwcdrv: internal_hwc_start(fd=%d)\n", fd); return 0; } HWCDRV_API int hwcdrv_overflow (siginfo_t *si, hwc_event_t *eventp, hwc_event_t *lost_events) { /* set expired counters to overflow value and all others to 0 */ /* return 0: OK, counters should be restarted */ /* return non-zero: eventp not set, counters should not be restarted */ /* clear return values */ int ii; for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) { eventp->ce_pic[ii] = 0; lost_events->ce_pic[ii] = 0; } hrtime_t sig_ts = gethrtime (); //YXXX get this from HWC event? eventp->ce_hrt = sig_ts; lost_events->ce_hrt = sig_ts; /* determine source signal */ int signal_fd = -1; switch (si->si_code) { case POLL_HUP: /* expected value from pcl */ /* According to Stephane Eranian: * "expect POLL_HUP instead of POLL_IN because we are * in one-shot mode (IOC_REFRESH)" */ signal_fd = si->si_fd; break; case SI_TKILL: /* event forwarded by tkill */ /* DBX can only forward SI_TKILL when it detects POLL_HUP * unfortunately, this means that si->si_fd has been lost... * We need to process the buffers, but we don't know the fd! */ TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" " SI_TKILL detected\n", sig_ts); break; default: // "sometimes we see a POLL_IN (1) with very high event rates," // according to eranian(?) TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" " unexpected si_code 0x%x\n", sig_ts, si->si_code); return HWCFUNCS_ERROR_GENERIC; } hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx (); if (!pctx) { TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" " tsd context is NULL\n", sig_ts); return HWCFUNCS_ERROR_UNEXPECTED; } counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list; if (!ctr_list) { TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" " ctr_list is NULL\n", sig_ts); return HWCFUNCS_ERROR_UNEXPECTED; } /* clear needs_restart flag */ for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) ctr_list[ii].needs_restart = 0; /* attempt to identify the counter to read */ int signal_idx = -1; pctx->signal_fd = signal_fd; // save the signal provided by siginfo_t if (signal_fd != -1) { for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) { if (ctr_list[ii].fd == signal_fd) { signal_idx = ii; break; } } } if (signal_idx < 0) { TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" " pmc not determined!\n", sig_ts); lost_events->ce_pic[0] = 1; /* record a bogus value into experiment */ // note: bogus value may get overwritten in loop below } /* capture sample(s). In addition to signal_idx, check other counters. */ struct perf_event_header sheader; int idx; for (idx = 0; idx < hdrv_pcl_state.hwcdef_cnt; idx++) { int num_recs = 0; while (1) { /* check for samples */ struct perf_event_mmap_page *metadata = ctr_list[idx].buf_state.buf; if (metadata == NULL) break; // empty if (metadata->data_tail == metadata->data_head) break; // empty /* read header */ if (read_buf (&ctr_list[idx].buf_state, &sheader, sizeof (sheader))) break; num_recs++; /* check for PERF_RECORD_SAMPLE */ size_t datasz = sheader.size - sizeof (struct perf_event_header); if (sheader.type != PERF_RECORD_SAMPLE) { TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" " unexpected recd type=%d\n", sig_ts, sheader.type); if (skip_buf (&ctr_list[idx].buf_state, datasz)) { TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" " skip recd type=%d failed\n", sig_ts, sheader.type); lost_events->ce_pic[idx] = 4; /* record a bogus value */ break; // failed to skip buffer?? } lost_events->ce_pic[idx] = 2; /* record a bogus value */ continue; // advance to next record } /* type is PERF_RECORD_SAMPLE */ uint64_t value, lostv; if (read_sample (&ctr_list[idx], datasz, &value, &lostv)) { TprintfT (DBG_LT0, "hwcdrv: sig_ts=%llu: ERROR: hwcdrv_overflow:" " read_sample() failed\n", sig_ts); lost_events->ce_pic[idx] = 3; // record a bogus value break; // failed to read sample data?? } TprintfT (DBG_LT3, "hwcdrv: sig_ts=%llu: hwcdrv_overflow:" " idx=%d value=%llu lost=%llu\n", (unsigned long long) sig_ts, idx, (unsigned long long) value, (unsigned long long) lostv); if (eventp->ce_pic[idx]) { TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" " idx=%d previous sample recorded as lost_event\n", sig_ts, idx); lost_events->ce_pic[idx] += eventp->ce_pic[idx]; } eventp->ce_pic[idx] = value; lost_events->ce_pic[idx] += lostv; } /* debug output for unexpected (but common) cases */ if (idx == signal_idx) { if (num_recs != 1) TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" " %d records for signal_idx=%d\n", sig_ts, num_recs, signal_idx); } else if (num_recs) TprintfT (DBG_LT2, "hwcdrv: sig_ts=%llu: WARNING: hwcdrv_overflow:" " %d unexpected record(s) for idx=%d (signal_idx=%d)\n", sig_ts, num_recs, idx, signal_idx); /* trigger counter restart whenever records were found */ if (num_recs) { /* check whether to adapt the overflow interval */ /* This is the Linux version. * The Solaris version is in hwprofile.c collector_update_overflow_counters(). */ hrtime_t min_time = global_perf_event_def[idx].min_time; if (min_time > 0 // overflow interval is adaptive && sig_ts - ctr_list[idx].last_overflow_time < min_time) // last interval below min { /* pick a new overflow interval */ /* roughly doubled, but add funny numbers */ /* hopefully the result is prime or not a multiple of some # of ops/loop */ uint64_t new_period = 2 * ctr_list[idx].last_overflow_period + 37; #if 0 // On Solaris, we report the adjustment to the log file. // On Linux it's hard for us to do so since hwcdrv_pcl.c doesn't know about collector_interface, SP_JCMD_COMMENT, or COL_COMMENT_HWCADJ. // For now we simply don't report. collector_interface->writeLog ("%s %d -> %d\n", SP_JCMD_COMMENT, COL_COMMENT_HWCADJ, global_perf_event_def[idx].name, ctr_list[idx].last_overflow_period, new_period); #endif /* There are a variety of ways of resetting the period on Linux. * The most elegant is * ioctl(fd,PERF_EVENT_IOC_PERIOD,&period) * but check the perf_event_open man page for PERF_EVENT_IOC_PERIOD: * > Prior to Linux 2.6.36 this ioctl always failed due to a bug in the kernel. * > Prior to Linux 3.14 (or 3.7 on ARM), the new period did not take effect * until after the next overflow. * So we're kind of stuck shutting the fd down and restarting it with the new period. */ if (stop_one_ctr (idx, ctr_list)) { // EUGENE figure out what to do on error } ctr_list[idx].last_overflow_period = new_period; if (start_one_ctr (idx, ctr_list[idx].buf_state.pagesz, pctx, "hwcdrv: ERROR: hwcdrv_overflow (readjust overflow):")) { // EUGENE figure out what to do on error } } ctr_list[idx].last_overflow_time = sig_ts; #if 0 ctr_list[idx].needs_restart = 1; #else // seems to be more reliable to restart here instead of hwcdrv_sighlr_restart() internal_hwc_start (ctr_list[idx].fd); #endif } } return 0; // OK to restart counters } HWCDRV_API int hwcdrv_sighlr_restart (const hwc_event_t *pp) { #if 0 // restarting here doesn't seem to work as well as restarting in hwcdrv_overflow() hdrv_pcl_ctx_t * pctx = hdrv_pcl_state.find_vpc_ctx (); if (!pctx) { TprintfT (DBG_LT0, "hwcdrv: ERROR: hwcdrv_sighlr_restart: find_vpc_ctx()==NULL\n"); return -1; } counter_state_t * ctr_list = (counter_state_t *) pctx->ctr_list; if (!ctr_list) { TprintfT (DBG_LT0, "hwcdrv: WARNING: hwcdrv_sighlr_restart: ctr_list is NULL\n"); return -1; } int errors = 0; for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) { if (ctr_list[ii].needs_restart) errors |= internal_hwc_start (ctr_list[ii].fd); ctr_list[ii].needs_restart = 0; } return errors; #else return 0; #endif } /* create counters based on hwcdef[] */ HWCDRV_API int hwcdrv_create_counters (unsigned hwcdef_cnt, Hwcentry *hwcdef) { if (hwcdef_cnt > hdrv_pcl_about.cpcN_npics) { logerr (GTXT ("More than %d counters were specified\n"), hdrv_pcl_about.cpcN_npics); /*!*/ return HWCFUNCS_ERROR_HWCARGS; } if (hdrv_pcl_about.cpcN_cpuver == CPUVER_UNDEFINED) { logerr (GTXT ("Processor not supported\n")); return HWCFUNCS_ERROR_HWCARGS; } /* add counters */ for (unsigned idx = 0; idx < hwcdef_cnt; idx++) { perf_event_def_t *glb_event_def = &global_perf_event_def[idx]; memset (glb_event_def, 0, sizeof (perf_event_def_t)); unsigned int pmc_sel; eventsel_t evntsel; if (hwcfuncs_get_x86_eventsel (hwcdef[idx].reg_num, hwcdef[idx].int_name, &evntsel, &pmc_sel)) { TprintfT (0, "hwcdrv: ERROR: hwcfuncs_get_x86_eventsel() failed\n"); return HWCFUNCS_ERROR_HWCARGS; } glb_event_def->reg_num = pmc_sel; glb_event_def->eventsel = evntsel; glb_event_def->counter_preload = hwcdef[idx].val; glb_event_def->min_time = hwcdef[idx].min_time; glb_event_def->name = strdup (hwcdef[idx].name); // memory leak??? very minor init_perf_event (&glb_event_def->hw, glb_event_def->eventsel, glb_event_def->counter_preload); TprintfT (DBG_LT1, "hwcdrv: create_counters: pic=%u name='%s' interval=%lld" "(min_time=%lld): reg_num=0x%x eventsel=0x%llx ireset=%lld usr=%lld sys=%lld\n", idx, hwcdef[idx].int_name, (long long) glb_event_def->counter_preload, (long long) glb_event_def->min_time, (int) glb_event_def->reg_num, (long long) glb_event_def->eventsel, (long long) HW_INTERVAL_PRESET (hwcdef[idx].val), (long long) glb_event_def->hw.exclude_user, (long long) glb_event_def->hw.exclude_kernel); } hdrv_pcl_state.hwcdef_cnt = hwcdef_cnt; return 0; } HWCDRV_API int hwcdrv_free_counters () // note: only performs shutdown for this thread { hdrv_pcl_ctx_t * pctx; if (!COUNTERS_ENABLED ()) return 0; pctx = hdrv_pcl_state.find_vpc_ctx (); if (!pctx) { TprintfT (0, "hwcdrv: WARNING: hwcdrv_free_counters: tsd context is NULL\n"); return HWCFUNCS_ERROR_GENERIC; } counter_state_t *ctr_list = pctx->ctr_list; if (!ctr_list) { // fork child: prolog suspends hwcs, then epilog frees them TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_free_counters: ctr_list is already NULL\n"); return 0; } int hwc_rc = 0; for (int ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) if (stop_one_ctr (ii, ctr_list)) hwc_rc = HWCFUNCS_ERROR_GENERIC; TprintfT (DBG_LT1, "hwcdrv: hwcdrv_free_counters(tid=0x%lx).\n", (long) pctx->tid); pctx->ctr_list = NULL; return hwc_rc; } HWCDRV_API int hwcdrv_start (void) /* must be called from each thread ? */ { hdrv_pcl_ctx_t *pctx = NULL; if (!COUNTERS_ENABLED ()) { TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_start: no counters to start \n"); return 0; } if (!hdrv_pcl_state.library_ok) { TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: library is not open\n"); return HWCFUNCS_ERROR_NOT_SUPPORTED; } /* * set up per-thread context */ pctx = hdrv_pcl_state.find_vpc_ctx (); if (!pctx) { TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: tsd context is NULL\n"); return HWCFUNCS_ERROR_UNEXPECTED; } pctx->tid = hwcdrv_gettid (); TprintfT (DBG_LT1, "hwcdrv: hwcdrv_start(tid=0x%lx)\n", (long) pctx->tid); /* * create per-thread counter list */ counter_state_t *ctr_list = (counter_state_t *) calloc (hdrv_pcl_state.hwcdef_cnt, sizeof (counter_state_t)); if (!ctr_list) { TprintfT (0, "hwcdrv: ERROR: hwcdrv_start: calloc(ctr_list) failed\n"); return HWCFUNCS_ERROR_MEMORY; } int ii; for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) ctr_list[ii].fd = -1; // invalidate fds in case we have to close prematurely pctx->ctr_list = ctr_list; /* * bind the counters */ size_t pgsz = sysconf (_SC_PAGESIZE); for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) { ctr_list[ii].last_overflow_period = global_perf_event_def[ii].hw.sample_period; if (start_one_ctr (ii, pgsz, pctx, "hwcdrv: ERROR: hwcdrv_start:")) goto hwcdrv_start_cleanup; } /* * start the counters */ for (ii = 0; ii < hdrv_pcl_state.hwcdef_cnt; ii++) { int rc = internal_hwc_start (ctr_list[ii].fd); if (rc < 0) goto hwcdrv_start_cleanup; } return 0; hwcdrv_start_cleanup: hwcdrv_free_counters (); // PERF_EVENT_IOC_DISABLE and close() for all fds return HWCFUNCS_ERROR_UNAVAIL; } HWCDRV_API int hwcdrv_lwp_suspend (void) /* must be called from each thread */ { if (!COUNTERS_ENABLED ()) { TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_suspend: no counters\n"); return 0; } TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_suspend()\n"); return hwcdrv_free_counters (); } HWCDRV_API int hwcdrv_lwp_resume (void) /* must be called from each thread */ { if (!COUNTERS_ENABLED ()) { TprintfT (DBG_LT1, "hwcdrv: WARNING: hwcdrv_lwp_resume: no counters\n"); return 0; } TprintfT (DBG_LT1, "hwcdrv: hwcdrv_lwp_resume()\n"); return hwcdrv_start (); } HWCDRV_API int hwcdrv_read_events (hwc_event_t *overflow_data, hwc_event_samples_t *sampled_data) { overflow_data->ce_hrt = 0; for (int i = 0; i < MAX_PICS; i++) { overflow_data->ce_pic[i] = 0; if (sampled_data) HWCFUNCS_SAMPLE_RESET (&sampled_data->sample[i]); } return 0; } /*---------------------------------------------------------------------------*/ /* HWCDRV_API */ hwcdrv_api_t hwcdrv_pcl_api = { hwcdrv_init, hwcdrv_get_info, hwcdrv_enable_mt, hwcdrv_get_descriptions, hwcdrv_assign_regnos, hwcdrv_create_counters, hwcdrv_start, hwcdrv_overflow, hwcdrv_read_events, hwcdrv_sighlr_restart, hwcdrv_lwp_suspend, hwcdrv_lwp_resume, hwcdrv_free_counters, hwcdrv_lwp_init, hwcdrv_lwp_fini, -1 // hwcdrv_init_status };