diff options
Diffstat (limited to 'gprofng/src/CallStack.cc')
-rw-r--r-- | gprofng/src/CallStack.cc | 1250 |
1 files changed, 1250 insertions, 0 deletions
diff --git a/gprofng/src/CallStack.cc b/gprofng/src/CallStack.cc new file mode 100644 index 0000000..7671f9f --- /dev/null +++ b/gprofng/src/CallStack.cc @@ -0,0 +1,1250 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include "config.h" +#include <new> + +#include "util.h" +#include "CacheMap.h" +#include "CallStack.h" +#include "DbeSession.h" +#include "DbeView.h" +#include "DbeLinkList.h" +#include "Experiment.h" +#include "Exp_Layout.h" +#include "Function.h" +#include "LoadObject.h" +#include "Module.h" + +Descendants::Descendants () +{ + count = 0; + limit = sizeof (first_data) / sizeof (CallStackNode *); + data = first_data; +} + +Descendants::~Descendants () +{ + if (data != first_data) + free (data); +} + +CallStackNode * +Descendants::find (Histable *hi, int *index) +{ + int cnt = count; + int left = 0; + for (int right = cnt - 1; left <= right;) + { + int ind = (left + right) / 2; + CallStackNode *node = data[ind]; + Histable *instr = node->get_instr (); + if (instr == hi) + { + if (index) + *index = ind; + return node; + } + if (instr->id < hi->id) + right = ind - 1; + else + left = ind + 1; + } + if (index) + *index = left; + return NULL; +} + +void +Descendants::append (CallStackNode* item) +{ + if (count < limit) + data[count++] = item; + else + insert (count, item); +} + +void +Descendants::insert (int ind, CallStackNode* item) +{ + CallStackNode **old_data = data; + int old_cnt = count; + if (old_cnt + 1 >= limit) + { + int new_limit = (limit == 0) ? DELTA : limit * 2; + CallStackNode **new_data = (CallStackNode **) malloc (new_limit * sizeof (CallStackNode *)); + for (int i = 0; i < ind; i++) + new_data[i] = old_data[i]; + new_data[ind] = item; + for (int i = ind; i < old_cnt; i++) + new_data[i + 1] = old_data[i]; + limit = new_limit; + data = new_data; + if (old_data != first_data) + free (old_data); + } + else + { + for (int i = ind; i < old_cnt; i++) + old_data[i + 1] = old_data[i]; + old_data[ind] = item; + } + count++; +} + +/* + * Private implementation of CallStack interface + */ + +// When performing pipeline optimization on resolve_frame_info + add_stack +// cstk_ctx structure contains the state (or context) for one iteration to pass on +// from Phase 2 to Phase 3 (More details in Experiment.cc) +class CallStackP : public CallStack +{ +public: + CallStackP (Experiment *exp); + + virtual ~CallStackP (); + + virtual void add_stack (DataDescriptor *dDscr, long idx, FramePacket *frp, cstk_ctx_chunk *cstCtxChunk); + virtual void *add_stack (Vector<Histable*> *objs); + virtual CallStackNode *get_node (int n); + virtual void print (FILE *); + +private: + + static const int CHUNKSZ = 16384; + + Experiment *experiment; + CallStackNode *root; + CallStackNode *jvm_node; + int nodes; + int nchunks; + CallStackNode **chunks; + Map<uint64_t, CallStackNode *> *cstackMap; + DbeLock *cstackLock; + + CallStackNode *add_stack (long start, long end, Vector<Histable*> *objs, CallStackNode *myRoot); + CallStackNode *new_Node (CallStackNode*, Histable*); + CallStackNode *find_preg_stack (uint64_t); + // objs are in the root..leaf order + void *add_stack_d (Vector<Histable*> *objs); + void add_stack_java (DataDescriptor *dDscr, long idx, FramePacket *frp, hrtime_t tstamp, uint32_t thrid, Vector<DbeInstr*>* natpcs, bool natpc_added, cstk_ctx_chunk *cstCtxChunk); + void add_stack_java_epilogue (DataDescriptor *dDscr, long idx, FramePacket *frp, hrtime_t tstamp, uint32_t thrid, Vector<DbeInstr*>* natpcs, Vector<Histable*>* jpcs, bool natpc_added); + + // Adjust HW counter event to find better trigger PC, etc. + DbeInstr *adjustEvent (DbeInstr *leafPC, DbeInstr * candPC, + Vaddr &eventEA, int abst_type); + Vector<DbeInstr*> *natpcsP; + Vector<Histable*> *jpcsP; +}; + +CallStackP::CallStackP (Experiment *exp) +{ + experiment = exp; + nchunks = 0; + chunks = NULL; + nodes = 0; + cstackMap = new CacheMap<uint64_t, CallStackNode *>; + cstackLock = new DbeLock (); + Function *total = dbeSession->get_Total_Function (); + root = new_Node (0, total->find_dbeinstr (0, 0)); + jvm_node = NULL; + natpcsP = NULL; + jpcsP = NULL; +} + +CallStackP::~CallStackP () +{ + delete cstackLock; + if (chunks) + { + for (int i = 0; i < nodes; i++) + { + CallStackNode *node = get_node (i); + node->~CallStackNode (); + } + for (int i = 0; i < nchunks; i++) + free (chunks[i]); + free (chunks); + } + delete natpcsP; + delete jpcsP; + destroy_map (CallStackNode *, cstackMap); +} + +CallStackNode * +CallStackP::new_Node (CallStackNode *anc, Histable *pcval) +{ + // cstackLock->aquireLock(); // Caller already locked it + if (nodes >= nchunks * CHUNKSZ) + { + CallStackNode **old_chunks = chunks; + nchunks++; + + // Reallocate Node chunk array + chunks = (CallStackNode **) malloc (nchunks * sizeof (CallStackNode *)); + for (int i = 0; i < nchunks - 1; i++) + chunks[i] = old_chunks[i]; + free (old_chunks); + // Allocate new chunk for nodes. + chunks[nchunks - 1] = (CallStackNode *) malloc (CHUNKSZ * sizeof (CallStackNode)); + } + nodes++; + CallStackNode *node = get_node (nodes - 1); + new (node) CallStackNode (anc, pcval); + // cstackLock->releaseLock(); + return node; +} + +CallStackNode * +CallStackP::find_preg_stack (uint64_t prid) +{ + DataView *dview = experiment->getOpenMPdata (); + dview->sort (PROP_CPRID); + Datum tval; + tval.setUINT64 (prid); + long idx = dview->getIdxByVals (&tval, DataView::REL_EQ); + if (idx < 0) + return root; + CallStackNode *node = (CallStackNode*) dview->getObjValue (PROP_USTACK, idx); + if (node != NULL) + return node; + uint64_t pprid = dview->getLongValue (PROP_PPRID, idx); + if (pprid == prid) + return root; + void *nat_stack = dview->getObjValue (PROP_MSTACK, idx); + Vector<Histable*> *pcs = getStackPCs (nat_stack); + + // Find the bottom frame + int btm; + bool inOMP = false; + DbeInstr *instr; + Histable *hist; + for (btm = 0; btm < pcs->size (); btm++) + { + hist = pcs->fetch (btm); + if (hist->get_type () == Histable::INSTR) + instr = (DbeInstr *) hist; + else // DBELINE + instr = (DbeInstr *) hist->convertto (Histable::INSTR); + LoadObject *lo = instr->func->module->loadobject; + if (!inOMP) + { + if (lo->flags & SEG_FLAG_OMP) + inOMP = true; + } + else if (!(lo->flags & SEG_FLAG_OMP)) + break; + } + + // Find the top frame + dview->sort (PROP_CPRID); + int top; + tval.setUINT64 (pprid); + long pidx = dview->getIdxByVals (&tval, DataView::REL_EQ); + if (pidx < 0) // No parent. Process the entire nat_stack + top = pcs->size () - 1; + else + { + uint32_t thrid = (uint32_t) dview->getIntValue (PROP_THRID, idx); + uint32_t pthrid = (uint32_t) dview->getIntValue (PROP_THRID, pidx); + if (thrid != pthrid) + { + // Parent is on a different stack. + // Process the entire nat_stack. Skip libthread. + for (top = pcs->size () - 1; top >= 0; top--) + { + hist = pcs->fetch (top); + if (hist->get_type () == Histable::INSTR) + instr = (DbeInstr *) hist; + else // DBELINE + instr = (DbeInstr *) hist->convertto (Histable::INSTR); + if (instr->func->module->loadobject->flags & SEG_FLAG_OMP) + break; + } + if (top < 0) // None found. May be incomplete call stack (x86) + top = pcs->size () - 1; + } + else + { + // Parent is on the same stack. Find match. + top = pcs->size () - 1; + void *pnat_stack = dview->getObjValue (PROP_MSTACK, pidx); + Vector<Histable*> *ppcs = getStackPCs (pnat_stack); + for (int ptop = ppcs->size () - 1; top >= 0 && ptop >= 0; + top--, ptop--) + { + if (pcs->fetch (top) != ppcs->fetch (ptop)) + break; + } + delete ppcs; + } + } + + // Process the found range + Vector<Histable*> *upcs = new Vector<Histable*>(128); + for (int i = btm; i <= top; ++i) + { + hist = (DbeInstr*) pcs->fetch (i); + if (hist->get_type () == Histable::INSTR) + instr = (DbeInstr *) hist; + else // DBELINE + instr = (DbeInstr *) hist->convertto (Histable::INSTR); + + if (instr->func->module->loadobject->flags & SEG_FLAG_OMP) + // Skip all frames from libmtsk + continue; + upcs->append (instr); + } + delete pcs; + node = find_preg_stack (pprid); + while (node != root) + { + upcs->append (node->instr); + node = node->ancestor; + } + node = (CallStackNode *) add_stack (upcs); + dview->setObjValue (PROP_USTACK, idx, node); + delete upcs; + return node; +} + +#define JNI_MARKER -3 + +// This is one iteration if the third stage of +// resolve_frame_info + add_stack pipeline. Works on building the java +// stacks +void +CallStackP::add_stack_java (DataDescriptor *dDscr, long idx, FramePacket *frp, + hrtime_t tstamp, uint32_t thrid, + Vector<DbeInstr*>* natpcs, bool natpc_added, + cstk_ctx_chunk *cstCtxChunk) +{ + Vector<Histable*> *jpcs = NULL; + cstk_ctx *cstctx = NULL; + if (cstCtxChunk != NULL) + { + cstctx = cstCtxChunk->cstCtxAr[idx % CSTCTX_CHUNK_SZ]; + jpcs = cstctx->jpcs; + jpcs->reset (); + } + if (jpcs == NULL) + { + // this is when we are not doing the pipeline optimization + // Temporary array for resolved addresses + // [leaf_pc .. root_pc] == [0..stack_size-1] + // Leave room for a possible "truncated" frame + if (jpcsP == NULL) + jpcsP = new Vector<Histable*>; + jpcs = jpcsP; + jpcs->reset (); + } + + // + // Construct the user stack + // + // Construct Java user stack + int jstack_size = frp->stackSize (true); + if (jstack_size) + { + // jpcs = new Vector<Histable*>( jstack_size ); + if (frp->isTruncatedStack (true)) + { + Function *truncf = dbeSession->getSpecialFunction (DbeSession::TruncatedStackFunc); + jpcs->append (truncf->find_dbeinstr (0, 0)); + } + + int nind = natpcs->size () - 1; // first native frame + for (int jind = jstack_size - 1; jind >= 0; jind--) + { + bool jleaf = (jind == 0); // is current java frame a leaf? + Vaddr mid = frp->getMthdFromStack (jind); + int bci = frp->getBciFromStack (jind); + DbeInstr *cur_instr = experiment->map_jmid_to_PC (mid, bci, tstamp); + jpcs->append (cur_instr); + if (bci == JNI_MARKER) + { + JMethod *j_method = (JMethod*) cur_instr->func; + // Find matching native function on the native stack + bool found = false; + for (; nind >= 0; nind--) + { + DbeInstr *nat_addr = natpcs->fetch (nind); + if (0 == nat_addr) + continue; + Function *nat_func = nat_addr->func; + if (!found && j_method->jni_match (nat_func)) + found = true; + if (found) + { + // XXX omazur: the following will skip JNI native method + // implemented in JVM itself. + // If we are back in JVM switch to processing Java + // frames if there are any. + if ((nat_func->module->loadobject->flags & SEG_FLAG_JVM) && !jleaf) + break; + jpcs->append (nat_addr); + } + } + } + } + } + add_stack_java_epilogue (dDscr, idx, frp, tstamp, thrid, natpcs, jpcs, natpc_added); +} + +// This is one iteration if the fourth stage of +// resolve_frame_info + add_stack pipeline. +// It adds the native and java stacks to the stackmap + +void +CallStackP::add_stack_java_epilogue (DataDescriptor *dDscr, long idx, FramePacket *frp, hrtime_t tstamp, uint32_t thrid, Vector<DbeInstr*>* natpcs, Vector<Histable*> *jpcs, bool natpc_added) +{ + CallStackNode *node = NULL; + if (!natpc_added) + { + node = (CallStackNode *) add_stack ((Vector<Histable*>*)natpcs); + dDscr->setObjValue (PROP_MSTACK, idx, node); + dDscr->setObjValue (PROP_XSTACK, idx, node); + dDscr->setObjValue (PROP_USTACK, idx, node); + } + + int jstack_size = frp->stackSize (true); + if (jstack_size) + { + if (jpcs != NULL) + node = (CallStackNode *) add_stack_d (jpcs); + if (node == NULL) + node = (CallStackNode*) dDscr->getObjValue (PROP_USTACK, idx); + dDscr->setObjValue (PROP_USTACK, idx, node); + Function *func = (Function*) node->instr->convertto (Histable::FUNCTION); + if (func != dbeSession->get_JUnknown_Function ()) + dDscr->setObjValue (PROP_XSTACK, idx, node); + } + + JThread *jthread = experiment->map_pckt_to_Jthread (thrid, tstamp); + if (jthread == JTHREAD_NONE && jstack_size != 0 && node != NULL) + { + Function *func = (Function*) node->instr->convertto (Histable::FUNCTION); + if (func != dbeSession->get_JUnknown_Function ()) + jthread = JTHREAD_DEFAULT; + } + dDscr->setObjValue (PROP_JTHREAD, idx, jthread); + if (jthread == JTHREAD_NONE || (jthread != JTHREAD_DEFAULT && jthread->is_system ())) + { + if (jvm_node == NULL) + { + Function *jvm = dbeSession->get_jvm_Function (); + if (jvm) + { + jvm_node = new_Node (root, jvm->find_dbeinstr (0, 0)); + CommonPacket::jvm_overhead = jvm_node; + } + } + dDscr->setObjValue (PROP_USTACK, idx, jvm_node); + } +} + +// This is one iteration of the 2nd stage of +// resolve_frame_info + add_stack() pipeline. Builds the stack for a given framepacket. +// When pipeline optimization is turnd off, cstctxchunk passed is NULL +void +CallStackP::add_stack (DataDescriptor *dDscr, long idx, FramePacket *frp, + cstk_ctx_chunk* cstCtxChunk) +{ + Vector<DbeInstr*> *natpcs = NULL; + cstk_ctx *cstctx = NULL; + int stack_size = frp->stackSize (); + if (cstCtxChunk != NULL) + { + cstctx = cstCtxChunk->cstCtxAr[idx % CSTCTX_CHUNK_SZ]; + natpcs = cstctx->natpcs; + natpcs->reset (); + } + if (natpcs == NULL) + { + // this is when we are not doing the pipeline optimization + // Temporary array for resolved addresses + // [leaf_pc .. root_pc] == [0..stack_size-1] + // Leave room for a possible "truncated" frame + if (natpcsP == NULL) + natpcsP = new Vector<DbeInstr*>; + natpcs = natpcsP; + natpcs->reset (); + } + + bool leaf = true; + hrtime_t tstamp = (hrtime_t) dDscr->getLongValue (PROP_TSTAMP, idx); + uint32_t thrid = (uint32_t) dDscr->getIntValue (PROP_THRID, idx); + + enum + { + NONE, + CHECK_O7, + USE_O7, + SKIP_O7 + } state = NONE; + + Vaddr o7_to_skip = 0; + for (int index = 0; index < stack_size; index++) + { + if (frp->isLeafMark (index)) + { + state = CHECK_O7; + continue; + } + + if (state == SKIP_O7) + { + // remember this bad o7 value since OMP might not recognize it + o7_to_skip = frp->getFromStack (index); + state = NONE; + continue; + } + + Vaddr va = frp->getFromStack (index); + DbeInstr *cur_instr = experiment->map_Vaddr_to_PC (va, tstamp); +#if ARCH(Intel)// TBR? FIXUP_XXX_SPARC_LINUX: switch should be on experiment ARCH, not dbe ARCH + // We need to adjust return addresses on intel + // in order to attribute inclusive metrics to + // proper call instructions. + if (experiment->exp_maj_version <= 9) + if (!leaf && cur_instr->addr != 0) + cur_instr = cur_instr->func->find_dbeinstr (0, cur_instr->addr - 1); +#endif + + // Skip PC's from PLT, update leaf and state accordingly + if ((cur_instr->func->flags & FUNC_FLAG_PLT) + && (leaf || state == CHECK_O7)) + { + if (state == CHECK_O7) + state = USE_O7; + leaf = false; + continue; + } + if (state == CHECK_O7) + { + state = USE_O7; + uint64_t saddr = cur_instr->func->save_addr; + if (cur_instr->func->isOutlineFunction) + // outline functions assume 'save' instruction + // Note: they accidentally have saddr == FUNC_ROOT + state = SKIP_O7; + else if (saddr == FUNC_ROOT) + { + // If a function is statically determined as a root + // but dynamically appears not, don't discard o7. + // One such case is __misalign_trap_handler on sparcv9. + if (stack_size == 3) + state = SKIP_O7; + } + else if (saddr != FUNC_NO_SAVE && cur_instr->addr > saddr) + state = SKIP_O7; + } + else if (state == USE_O7) + { + state = NONE; + if (cur_instr->flags & PCInvlFlag) + continue; + } + if (leaf) + { + Vaddr evpc = (Vaddr) dDscr->getLongValue (PROP_VIRTPC, idx); + if (evpc != 0 + && !(index > 0 && frp->isLeafMark (index - 1) + && evpc == (Vaddr) (-1))) + { + /* contains hwcprof info */ + cur_instr->func->module->read_hwcprof_info (); + + // complete ABS validation of candidate eventPC/eventEA + // and correction/adjustment of collected callstack leaf PC + DbeInstr *candPC = experiment->map_Vaddr_to_PC (evpc, tstamp); + Vaddr vaddr = (Vaddr) dDscr->getLongValue (PROP_VADDR, idx); + Vaddr tmp_vaddr = vaddr; + int abst_type; + uint32_t tag = dDscr->getIntValue (PROP_HWCTAG, idx); + if (tag < 0 || tag >= MAX_HWCOUNT) + abst_type = ABST_NOPC; + else + abst_type = experiment->coll_params.hw_tpc[tag]; + + // We need to adjust addresses for ABST_EXACT_PEBS_PLUS1 + // (Nehalem/SandyBridge PEBS identifies PC+1, not PC) + if (abst_type == ABST_EXACT_PEBS_PLUS1 && candPC->addr != 0) + candPC = candPC->func->find_dbeinstr (0, candPC->func->find_previous_addr (candPC->addr)); + + cur_instr = adjustEvent (cur_instr, candPC, tmp_vaddr, abst_type); + if (vaddr != tmp_vaddr) + { + if (tmp_vaddr < ABS_CODE_RANGE) + { + /* post processing backtrack failed */ + dDscr->setValue (PROP_VADDR, idx, tmp_vaddr); + dDscr->setValue (PROP_PADDR, idx, ABS_NULL); + /* hwcp->eventVPC = xxxxx leave eventPC alone, + * or can we set it to leafpc? */ + dDscr->setValue (PROP_PHYSPC, idx, ABS_NULL); + } + else + { + /* internal error: why would post-processing modify vaddr? */ + dDscr->setValue (PROP_PADDR, idx, (Vaddr) (-1)); + dDscr->setValue (PROP_PHYSPC, idx, (Vaddr) (-1)); + } + } + } + } + natpcs->append (cur_instr); + leaf = false; + + // A hack to deceive the user into believing that outlined code + // is called from the base function + DbeInstr *drvd = cur_instr->func->derivedNode; + if (drvd != NULL) + natpcs->append (drvd); + } + if (frp->isTruncatedStack ()) + { + Function *truncf = dbeSession->getSpecialFunction (DbeSession::TruncatedStackFunc); + natpcs->append (truncf->find_dbeinstr (0, 0)); + } + else if (frp->isFailedUnwindStack ()) + { + Function *funwf = dbeSession->getSpecialFunction (DbeSession::FailedUnwindFunc); + natpcs->append (funwf->find_dbeinstr (0, 0)); + } + + CallStackNode *node = (CallStackNode*) add_stack ((Vector<Histable*>*)natpcs); + dDscr->setObjValue (PROP_MSTACK, idx, node); + dDscr->setObjValue (PROP_XSTACK, idx, node); + dDscr->setObjValue (PROP_USTACK, idx, node); + + // OpenMP 3.0 stacks + stack_size = frp->ompstack->size (); + if (stack_size > 0 || frp->omp_state == OMP_IDLE_STATE) + { + Function *func; + Vector<Histable*> *omppcs = new Vector<Histable*>(stack_size); + Vector<Histable*> *ompxpcs = new Vector<Histable*>(stack_size); + switch (frp->omp_state) + { + case OMP_IDLE_STATE: + case OMP_RDUC_STATE: + case OMP_IBAR_STATE: + case OMP_EBAR_STATE: + case OMP_LKWT_STATE: + case OMP_CTWT_STATE: + case OMP_ODWT_STATE: + case OMP_ATWT_STATE: + { + func = dbeSession->get_OMP_Function (frp->omp_state); + DbeInstr *instr = func->find_dbeinstr (0, 0); + omppcs->append (instr); + ompxpcs->append (instr); + break; + } + } + Vector<Vaddr> *stck = frp->ompstack; + leaf = true; + for (int index = 0; index < stack_size; index++) + { + if (stck->fetch (index) == SP_LEAF_CHECK_MARKER) + { + state = CHECK_O7; + continue; + } + if (state == SKIP_O7) + { + state = NONE; + continue; + } + + // The OMP stack might not have enough information to know to discard a bad o7. + // So just remember what the native stack skipped. + if (o7_to_skip == stck->fetch (index)) + { + state = NONE; + continue; + } + Vaddr va = stck->fetch (index); + DbeInstr *cur_instr = experiment->map_Vaddr_to_PC (va, tstamp); + + // Skip PC's from PLT, update leaf and state accordingly + if ((cur_instr->func->flags & FUNC_FLAG_PLT) && + (leaf || state == CHECK_O7)) + { + if (state == CHECK_O7) + state = USE_O7; + leaf = false; + continue; + } + if (state == CHECK_O7) + { + state = USE_O7; + uint64_t saddr = cur_instr->func->save_addr; + if (cur_instr->func->isOutlineFunction) + // outline functions assume 'save' instruction + // Note: they accidentally have saddr == FUNC_ROOT + state = SKIP_O7; + else if (saddr == FUNC_ROOT) + { + // If a function is statically determined as a root + // but dynamically appears not, don't discard o7. + // One such case is __misalign_trap_handler on sparcv9. + if (stack_size == 3) + state = SKIP_O7; + } + else if (saddr != FUNC_NO_SAVE && cur_instr->addr > saddr) + state = SKIP_O7; + } + else if (state == USE_O7) + { + state = NONE; + if (cur_instr->flags & PCInvlFlag) + continue; + } + + DbeLine *dbeline = (DbeLine*) cur_instr->convertto (Histable::LINE); + if (cur_instr->func->usrfunc) + { + dbeline = dbeline->sourceFile->find_dbeline (cur_instr->func->usrfunc, dbeline->lineno); + omppcs->append (dbeline); + } + else if (dbeline->lineno > 0) + omppcs->append (dbeline); + else + omppcs->append (cur_instr); + if (dbeline->is_set (DbeLine::OMPPRAGMA) && + frp->omp_state == OMP_WORK_STATE) + dDscr->setValue (PROP_OMPSTATE, idx, OMP_OVHD_STATE); + ompxpcs->append (cur_instr); + leaf = false; + } + if (frp->omptruncated == SP_TRUNC_STACK_MARKER) + { + func = dbeSession->getSpecialFunction (DbeSession::TruncatedStackFunc); + DbeInstr *instr = func->find_dbeinstr (0, 0); + omppcs->append (instr); + ompxpcs->append (instr); + } + else if (frp->omptruncated == SP_FAILED_UNWIND_MARKER) + { + func = dbeSession->getSpecialFunction (DbeSession::FailedUnwindFunc); + DbeInstr *instr = func->find_dbeinstr (0, 0); + omppcs->append (instr); + ompxpcs->append (instr); + } + + // User model call stack + node = (CallStackNode*) add_stack (omppcs); + dDscr->setObjValue (PROP_USTACK, idx, node); + delete omppcs; + + // Expert call stack + node = (CallStackNode*) add_stack (ompxpcs); + dDscr->setObjValue (PROP_XSTACK, idx, node); + delete ompxpcs; + dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT); + return; + } + + // OpenMP 2.5 stacks + if (frp->omp_cprid || frp->omp_state) + { + DataView *dview = experiment->getOpenMPdata (); + if (dview == NULL) + { + // It appears we may get OMP_SERL_STATE from a passive libmtsk + dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT); + return; + } + if (dview->getDataDescriptor () == dDscr) + { + // Don't process the user stack for OpenMP fork events yet + dDscr->setObjValue (PROP_USTACK, idx, (void*) NULL); + dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT); + return; + } + Vector<Histable*> *omppcs = new Vector<Histable*>(stack_size); + + // Construct OMP user stack + // Find the bottom frame + int btm = 0; + switch (frp->omp_state) + { + case OMP_IDLE_STATE: + { + Function *func = dbeSession->get_OMP_Function (frp->omp_state); + omppcs->append (func->find_dbeinstr (0, 0)); + // XXX: workaround for inconsistency between OMP_IDLE_STATE + // and omp_cprid != 0 + frp->omp_cprid = 0; + btm = natpcs->size (); + break; + } + case OMP_RDUC_STATE: + case OMP_IBAR_STATE: + case OMP_EBAR_STATE: + case OMP_LKWT_STATE: + case OMP_CTWT_STATE: + case OMP_ODWT_STATE: + case OMP_ATWT_STATE: + { + Function *func = dbeSession->get_OMP_Function (frp->omp_state); + omppcs->append (func->find_dbeinstr (0, 0)); + bool inOMP = false; + for (btm = 0; btm < natpcs->size (); btm++) + { + LoadObject *lo = natpcs->fetch (btm)->func->module->loadobject; + if (!inOMP) + { + if (lo->flags & SEG_FLAG_OMP) + inOMP = true; + } + else if (!(lo->flags & SEG_FLAG_OMP)) + break; + } + break; + } + case OMP_NO_STATE: + case OMP_WORK_STATE: + case OMP_SERL_STATE: + default: + break; + } + + // Find the top frame + int top = -1; + switch (frp->omp_state) + { + case OMP_IDLE_STATE: + break; + default: + { + dview->sort (PROP_CPRID); + Datum tval; + tval.setUINT64 (frp->omp_cprid); + long pidx = dview->getIdxByVals (&tval, DataView::REL_EQ); + if (pidx < 0) // No parent. Process the entire nat_stack + top = natpcs->size () - 1; + else + { + uint32_t pthrid = (uint32_t) dview->getIntValue (PROP_THRID, pidx); + if (thrid != pthrid) + { + // Parent is on a different stack. + // Process the entire nat_stack. Skip libthread. + for (top = natpcs->size () - 1; top >= 0; top--) + { + DbeInstr *instr = natpcs->fetch (top); + if (instr->func->module->loadobject->flags & SEG_FLAG_OMP) + break; + } + if (top < 0) // None found. May be incomplete call stack + top = natpcs->size () - 1; + } + else + { + // Parent is on the same stack. Find match. + top = natpcs->size () - 1; + void *pnat_stack = dview->getObjValue (PROP_MSTACK, pidx); + Vector<Histable*> *ppcs = getStackPCs (pnat_stack); + for (int ptop = ppcs->size () - 1; top >= 0 && ptop >= 0; + top--, ptop--) + { + if (natpcs->fetch (top) != ppcs->fetch (ptop)) + break; + } + delete ppcs; + } + } + // If no frames are found for Barrier/Reduction save at least one + if ((frp->omp_state == OMP_RDUC_STATE + || frp->omp_state == OMP_IBAR_STATE + || frp->omp_state == OMP_EBAR_STATE) + && top < btm && btm < natpcs->size ()) + top = btm; + } + } + for (int i = btm; i <= top; ++i) + { + DbeInstr *instr = natpcs->fetch (i); + if (instr->func->module->loadobject->flags & SEG_FLAG_OMP) + continue; // Skip all frames from libmtsk + omppcs->append (instr); + } + node = find_preg_stack (frp->omp_cprid); + while (node != root) + { + omppcs->append (node->instr); + node = node->ancestor; + } + node = (CallStackNode *) add_stack (omppcs); + dDscr->setObjValue (PROP_USTACK, idx, node); + delete omppcs; + dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT); + return; + } + + // Construct Java user stack + add_stack_java (dDscr, idx, frp, tstamp, thrid, natpcs, true, NULL); +} + +// adjustment of leafPC/eventVA for XHWC packets with candidate eventPC +// Called from CallStack during initial processing of the events +DbeInstr * +CallStackP::adjustEvent (DbeInstr *leafPC, DbeInstr *candPC, Vaddr &eventVA, + int abst_type) +{ + // increment counter of dataspace events + experiment->dsevents++; + bool isPrecise; + if (abst_type == ABST_EXACT_PEBS_PLUS1) + isPrecise = true; + else if (abst_type == ABST_EXACT) + isPrecise = true; + else + isPrecise = false; + + if (isPrecise) + /* precise backtracking */ + /* assume within 1 instruction of leaf (this could be checked here) */ + // no change to eventVA or candPC + return candPC; + + Function *func = leafPC->func; + unsigned int bt_entries = func->module->bTargets.size (); + DbeInstr *bestPC = NULL; + + // bt == branch target (potential destination of a branch + if (bt_entries == 0) + { // no XHWCprof info for this module + // increment counter + experiment->dsnoxhwcevents++; + + // see if event is to be processed anyway + if (!dbeSession->check_ignore_no_xhwcprof ()) + { + // Don't ignore error + // XXX -- set error code in event VA -- replace with other mechanism + if (eventVA > ABS_CODE_RANGE) + eventVA = ABS_NULL; + eventVA |= ABS_NO_CTI_INFO; // => effective address can't be validated + bestPC = leafPC; // => no PC correction possible + } + else + bestPC = candPC; // assume the event valid + } + else + { + // we have the info to verify the backtracking + target_info_t *bt; + int bt_entry = bt_entries; + uint64_t leafPC_offset = func->img_offset + leafPC->addr; + uint64_t candPC_offset = candPC->func->img_offset + candPC->addr; + do + { + bt_entry--; + bt = func->module->bTargets.fetch (bt_entry); + /* bts seem to be sorted by offset, smallest to largest */ + } + while (bt_entry > 0 && bt->offset > leafPC_offset); + /* if bt_entry == 0, all items have been checked */ + + if (bt->offset > leafPC_offset) + { /* XXXX isn't is possible that all bt's are after leafPC_offset? */ + bestPC = leafPC; // actual event PC can't be determined + if (eventVA > ABS_CODE_RANGE) + eventVA = ABS_NULL; + eventVA |= ABS_INFO_FAILED; // effective address can't be validated + } + else if (bt->offset > candPC_offset) + { + // use synthetic PC corresponding to bTarget + bestPC = func->find_dbeinstr (PCTrgtFlag, bt->offset - func->img_offset); + if (eventVA > ABS_CODE_RANGE) + eventVA = ABS_NULL; + eventVA |= ABS_CTI_TARGET; // effective address can't be validated + } + else + bestPC = candPC; // accept provided virtual address as valid + } + return bestPC; +} + +void * +CallStackP::add_stack_d (Vector<Histable*> *objs) +{ + // objs: root..leaf + // Reverse objs + for (int i = 0, j = objs->size () - 1; i < j; ++i, --j) + objs->swap (i, j); + return add_stack (objs); +} + +CallStackNode::CallStackNode (CallStackNode *_ancestor, Histable *_instr) +{ + ancestor = _ancestor; + instr = _instr; + alt_node = NULL; +} + +CallStackNode::~CallStackNode () { } + +bool +CallStackNode::compare (long start, long end, Vector<Histable*> *objs, CallStackNode *mRoot) +{ + CallStackNode *p = this; + for (long i = start; i < end; i++, p = p->get_ancestor ()) + if (p == NULL || p->get_instr () != objs->get (i)) + return false; + return p == mRoot; +} + +void +CallStackNode::dump () +{ + const char *s = ""; + int sz = 0; + for (CallStackNode *p = this; p; p = p->get_ancestor ()) + { + fprintf (stderr, NTXT ("%.*s 0x%08llx id=0x%08llx %s\n"), sz, s, + (long long) p, (long long) p->get_instr ()->id, + STR (p->get_instr ()->get_name ())); + s = "-"; + sz += 1; + } +} + +long total_calls_add_stack, total_stacks, total_nodes, call_stack_size[201]; + +void * +CallStackP::add_stack (Vector<Histable*> *objs) +{ + // objs: leaf..root + uint64_t hash = objs->size (); + for (long i = objs->size () - 1; i >= 0; --i) + hash ^= (unsigned long long) objs->get (i); + + uint64_t key = hash ? hash : 1; + CallStackNode *node = cstackMap->get (key); +#ifdef DEBUG + if (DUMP_CALL_STACK) + { + total_calls_add_stack++; + call_stack_size[objs->size () > 200 ? 200 : objs->size ()]++; + Dprintf (DUMP_CALL_STACK, + "add_stack: %lld size=%lld key=0x%08llx cashNode=0x%08llx\n", + (long long) total_calls_add_stack, (long long) objs->size (), + (long long) key, (long long) node); + for (long i = 0, sz = VecSize (objs); i < sz; i++) + Dprintf (DUMP_CALL_STACK, " add_stack: %.*s 0x%08llx id=0x%08llx %s\n", + (int) i, NTXT (" "), (long long) objs->get (i), + (long long) objs->get (i)->id, STR (objs->get (i)->get_name ())); + } +#endif + if (node && node->compare (0, objs->size (), objs, root)) + { + Dprintf (DUMP_CALL_STACK, NTXT ("STACK FOUND: key=0x%08llx 0x%08llx id=0x%08llx %s\n"), + (long long) key, (long long) node, + (long long) node->get_instr ()->id, + STR (node->get_instr ()->get_name ())); + return node; + } + node = root; + for (long i = objs->size () - 1; i >= 0; i--) + { + Histable *instr = objs->get (i); + int old_count = node->count; + int left; + CallStackNode *nd = node->find (instr, &left); + if (nd) + { + node = nd; + continue; + } + cstackLock->aquireLock (); // Use one lock for all nodes + // node->aquireLock(); + if (old_count != node->count) + { + nd = node->find (instr, &left); + if (nd) + { // the other thread has created this node + cstackLock->releaseLock (); + // node->releaseLock(); + node = nd; + continue; + } + } + // New Call Stack + total_stacks++; + nd = node; + CallStackNode *first = NULL; + do + { + CallStackNode *anc = node; + total_nodes++; + node = new_Node (anc, objs->get (i)); + if (first) + anc->append (node); + else + first = node; + } + while (i-- > 0); + nd->insert (left, first); + cstackLock->releaseLock (); + // nd->releaseLock(); + break; + } + cstackMap->put (key, node); + if (DUMP_CALL_STACK) + node->dump (); + return node; +} + +CallStackNode * +CallStackP::get_node (int n) +{ + if (n < nodes) + return &chunks[n / CHUNKSZ][n % CHUNKSZ]; + return NULL; +} + +/* + * Debugging methods + */ +void +CallStackP::print (FILE *fd) +{ + FILE *f = (fd == NULL ? stderr : fd); + fprintf (f, GTXT ("CallStack: nodes = %d\n\n"), nodes); + int maxdepth = 0; + int maxwidth = 0; + const char *t; + char *n; + for (int i = 0; i < nodes; i++) + { + CallStackNode *node = &chunks[i / CHUNKSZ][i % CHUNKSZ]; + Histable *instr = node->instr; + if (instr->get_type () == Histable::LINE) + { + t = "L"; + n = ((DbeLine *) instr)->func->get_name (); + } + else if (instr->get_type () == Histable::INSTR) + { + t = "I"; + n = ((DbeInstr *) instr)->func->get_name (); + } + else + { + t = "O"; + n = instr->get_name (); + } + long long addr = (long long) instr->get_addr (); + fprintf (f, GTXT ("node: 0x%016llx anc: 0x%016llx -- 0x%016llX: %s %s\n"), + (unsigned long long) node, (unsigned long long) node->ancestor, + addr, t, n); + } + fprintf (f, GTXT ("md = %d, mw = %d\n"), maxdepth, maxwidth); +} + +/* + * Static CallStack methods + */ +CallStack * +CallStack::getInstance (Experiment *exp) +{ + return new CallStackP (exp); +} + +int +CallStack::stackSize (void *stack) +{ + CallStackNode *node = (CallStackNode *) stack; + int sz = 0; + for (; node; node = node->ancestor) + sz++; + return sz - 1; // don't count the root node +} + +Histable * +CallStack::getStackPC (void *stack, int n) +{ + CallStackNode *node = (CallStackNode *) stack; + while (n-- && node) + node = node->ancestor; + if (node == NULL) + return dbeSession->get_Unknown_Function ()->find_dbeinstr (PCInvlFlag, 0); + return node->instr; +} + +Vector<Histable*> * +CallStack::getStackPCs (void *stack, bool get_hide_stack) +{ + Vector<Histable*> *res = new Vector<Histable*>; + CallStackNode *node = (CallStackNode *) stack; + if (get_hide_stack && node->alt_node != NULL) + node = node->alt_node; + while (node && node->ancestor) + { // skip the root node + res->append (node->instr); + node = node->ancestor; + } + return res; +} + +int +CallStack::compare (void *stack1, void *stack2) +{ + // Quick comparision + if (stack1 == stack2) + return 0; + + CallStackNode *node1 = (CallStackNode *) stack1; + CallStackNode *node2 = (CallStackNode *) stack2; + while (node1 != NULL && node2 != NULL) + { + //to keep the result const on different platforms + //we use instr->id instead of instr + if (node1->instr->id < node2->instr->id) + return -1; + else if (node1->instr->id > node2->instr->id) + return 1; + node1 = node1->ancestor; + node2 = node2->ancestor; + } + if (node1 == NULL && node2 != NULL) + return -1; + else if (node1 != NULL && node2 == NULL) + return 1; + else + return 0; +} + +// LIBRARY VISIBILITY + +void +CallStack::setHideStack (void *stack, void *hideStack) +{ + CallStackNode *hNode = (CallStackNode *) stack; + hNode->alt_node = (CallStackNode *) hideStack; +} |