/* Copyright (C) 2021-2024 Free Software Foundation, Inc.
   Contributed by Oracle.

   This file is part of GNU Binutils.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, 51 Franklin Street - Fifth Floor, Boston,
   MA 02110-1301, USA.  */

#include "config.h"
#include <new>

#include "util.h"
#include "CacheMap.h"
#include "CallStack.h"
#include "DbeSession.h"
#include "DbeView.h"
#include "DbeLinkList.h"
#include "Experiment.h"
#include "Exp_Layout.h"
#include "Function.h"
#include "LoadObject.h"
#include "Module.h"

Descendants::Descendants ()
{
  count = 0;
  limit = sizeof (first_data) / sizeof (CallStackNode *);
  data = first_data;
}

Descendants::~Descendants ()
{
  if (data != first_data)
    free (data);
}

CallStackNode *
Descendants::find (Histable *hi, int *index)
{
  int cnt = count;
  int left = 0;
  for (int right = cnt - 1; left <= right;)
    {
      int ind = (left + right) / 2;
      CallStackNode *node = data[ind];
      Histable *instr = node->get_instr ();
      if (instr == hi)
	{
	  if (index)
	    *index = ind;
	  return node;
	}
      if (instr->id < hi->id)
	right = ind - 1;
      else
	left = ind + 1;
    }
  if (index)
    *index = left;
  return NULL;
}

void
Descendants::append (CallStackNode* item)
{
  if (count < limit)
    data[count++] = item;
  else
    insert (count, item);
}

void
Descendants::insert (int ind, CallStackNode* item)
{
  CallStackNode **old_data = data;
  int old_cnt = count;
  if (old_cnt + 1 >= limit)
    {
      int new_limit = (limit == 0) ? DELTA : limit * 2;
      CallStackNode **new_data = (CallStackNode **) malloc (new_limit * sizeof (CallStackNode *));
      for (int i = 0; i < ind; i++)
	new_data[i] = old_data[i];
      new_data[ind] = item;
      for (int i = ind; i < old_cnt; i++)
	new_data[i + 1] = old_data[i];
      limit = new_limit;
      data = new_data;
      if (old_data != first_data)
	free (old_data);
    }
  else
    {
      for (int i = ind; i < old_cnt; i++)
	old_data[i + 1] = old_data[i];
      old_data[ind] = item;
    }
  count++;
}

/*
 *    Private implementation of CallStack interface
 */

// When performing pipeline optimization on resolve_frame_info + add_stack
// cstk_ctx structure contains the state (or context) for one iteration to pass on
// from Phase 2 to Phase 3 (More details in Experiment.cc)
class CallStackP : public CallStack
{
public:
  CallStackP (Experiment *exp);

  virtual ~CallStackP ();

  virtual void add_stack (DataDescriptor *dDscr, long idx, FramePacket *frp, cstk_ctx_chunk *cstCtxChunk);
  virtual void *add_stack (Vector<Histable*> *objs);
  virtual CallStackNode *get_node (int n);
  virtual void print (FILE *);

private:

  static const int CHUNKSZ = 16384;

  Experiment *experiment;
  CallStackNode *root;
  CallStackNode *jvm_node;
  int nodes;
  int nchunks;
  CallStackNode **chunks;
  Map<uint64_t, CallStackNode *> *cstackMap;
  DbeLock *cstackLock;

  CallStackNode *add_stack (long start, long end, Vector<Histable*> *objs, CallStackNode *myRoot);
  CallStackNode *new_Node (CallStackNode*, Histable*);
  CallStackNode *find_preg_stack (uint64_t);
  // objs are in the root..leaf order
  void *add_stack_d (Vector<Histable*> *objs);
  void add_stack_java (DataDescriptor *dDscr, long idx, FramePacket *frp, hrtime_t tstamp, uint32_t thrid, Vector<DbeInstr*>* natpcs, bool natpc_added, cstk_ctx_chunk *cstCtxChunk);
  void add_stack_java_epilogue (DataDescriptor *dDscr, long idx, FramePacket *frp, hrtime_t tstamp, uint32_t thrid, Vector<DbeInstr*>* natpcs, Vector<Histable*>* jpcs, bool natpc_added);

  // Adjust HW counter event to find better trigger PC, etc.
  DbeInstr *adjustEvent (DbeInstr *leafPC, DbeInstr * candPC,
			 Vaddr &eventEA, int abst_type);
  Vector<DbeInstr*> *natpcsP;
  Vector<Histable*> *jpcsP;
};

CallStackP::CallStackP (Experiment *exp)
{
  experiment = exp;
  nchunks = 0;
  chunks = NULL;
  nodes = 0;
  cstackMap = new CacheMap<uint64_t, CallStackNode *>;
  cstackLock = new DbeLock ();
  Function *total = dbeSession->get_Total_Function ();
  root = new_Node (0, total->find_dbeinstr (0, 0));
  jvm_node = NULL;
  natpcsP = NULL;
  jpcsP = NULL;
}

CallStackP::~CallStackP ()
{
  delete cstackLock;
  if (chunks)
    {
      for (int i = 0; i < nodes; i++)
	{
	  CallStackNode *node = get_node (i);
	  node->~CallStackNode ();
	}
      for (int i = 0; i < nchunks; i++)
	free (chunks[i]);
      free (chunks);
    }
  delete natpcsP;
  delete jpcsP;
  destroy_map (CallStackNode *, cstackMap);
}

CallStackNode *
CallStackP::new_Node (CallStackNode *anc, Histable *pcval)
{
  // cstackLock->aquireLock(); // Caller already locked it
  if (nodes >= nchunks * CHUNKSZ)
    {
      CallStackNode **old_chunks = chunks;
      nchunks++;

      // Reallocate Node chunk array
      chunks = (CallStackNode **) malloc (nchunks * sizeof (CallStackNode *));
      for (int i = 0; i < nchunks - 1; i++)
	chunks[i] = old_chunks[i];
      free (old_chunks);
      // Allocate new chunk for nodes.
      chunks[nchunks - 1] = (CallStackNode *) malloc (CHUNKSZ * sizeof (CallStackNode));
    }
  nodes++;
  CallStackNode *node = get_node (nodes - 1);
  new (node) CallStackNode (anc, pcval);
  // cstackLock->releaseLock();
  return node;
}

CallStackNode *
CallStackP::find_preg_stack (uint64_t prid)
{
  DataView *dview = experiment->getOpenMPdata ();
  dview->sort (PROP_CPRID);
  Datum tval;
  tval.setUINT64 (prid);
  long idx = dview->getIdxByVals (&tval, DataView::REL_EQ);
  if (idx < 0)
    return root;
  CallStackNode *node = (CallStackNode*) dview->getObjValue (PROP_USTACK, idx);
  if (node != NULL)
    return node;
  uint64_t pprid = dview->getLongValue (PROP_PPRID, idx);
  if (pprid == prid)
    return root;
  void *nat_stack = dview->getObjValue (PROP_MSTACK, idx);
  Vector<Histable*> *pcs = getStackPCs (nat_stack);

  // Find the bottom frame
  int btm;
  bool inOMP = false;
  DbeInstr *instr;
  Histable *hist;
  for (btm = 0; btm < pcs->size (); btm++)
    {
      hist = pcs->fetch (btm);
      if (hist->get_type () == Histable::INSTR)
	instr = (DbeInstr *) hist;
      else    // DBELINE
	instr = (DbeInstr *) hist->convertto (Histable::INSTR);
      LoadObject *lo = instr->func->module->loadobject;
      if (!inOMP)
	{
	  if (lo->flags & SEG_FLAG_OMP)
	    inOMP = true;
	}
      else if (!(lo->flags & SEG_FLAG_OMP))
	break;
    }

  // Find the top frame
  dview->sort (PROP_CPRID);
  int top;
  tval.setUINT64 (pprid);
  long pidx = dview->getIdxByVals (&tval, DataView::REL_EQ);
  if (pidx < 0)     // No parent. Process the entire nat_stack
    top = pcs->size () - 1;
  else
    {
      uint32_t thrid = (uint32_t) dview->getIntValue (PROP_THRID, idx);
      uint32_t pthrid = (uint32_t) dview->getIntValue (PROP_THRID, pidx);
      if (thrid != pthrid)
	{
	  // Parent is on a different stack.
	  // Process the entire nat_stack. Skip libthread.
	  for (top = pcs->size () - 1; top >= 0; top--)
	    {
	      hist = pcs->fetch (top);
	      if (hist->get_type () == Histable::INSTR)
		instr = (DbeInstr *) hist;
	      else // DBELINE
		instr = (DbeInstr *) hist->convertto (Histable::INSTR);
	      if (instr->func->module->loadobject->flags & SEG_FLAG_OMP)
		break;
	    }
	  if (top < 0)  // None found. May be incomplete call stack (x86)
	    top = pcs->size () - 1;
	}
      else
	{
	  // Parent is on the same stack. Find match.
	  top = pcs->size () - 1;
	  void *pnat_stack = dview->getObjValue (PROP_MSTACK, pidx);
	  Vector<Histable*> *ppcs = getStackPCs (pnat_stack);
	  for (int ptop = ppcs->size () - 1; top >= 0 && ptop >= 0;
		  top--, ptop--)
	    {
	      if (pcs->fetch (top) != ppcs->fetch (ptop))
		break;
	    }
	  delete ppcs;
	}
    }

  // Process the found range
  Vector<Histable*> *upcs = new Vector<Histable*>(128);
  for (int i = btm; i <= top; ++i)
    {
      hist = (DbeInstr*) pcs->fetch (i);
      if (hist->get_type () == Histable::INSTR)
	instr = (DbeInstr *) hist;
      else // DBELINE
	instr = (DbeInstr *) hist->convertto (Histable::INSTR);

      if (instr->func->module->loadobject->flags & SEG_FLAG_OMP)
	// Skip all frames from libmtsk
	continue;
      upcs->append (instr);
    }
  delete pcs;
  node = find_preg_stack (pprid);
  while (node != root)
    {
      upcs->append (node->instr);
      node = node->ancestor;
    }
  node = (CallStackNode *) add_stack (upcs);
  dview->setObjValue (PROP_USTACK, idx, node);
  delete upcs;
  return node;
}

#define JNI_MARKER -3

// This is one iteration if the third stage of
// resolve_frame_info + add_stack pipeline. Works on building the java
// stacks
void
CallStackP::add_stack_java (DataDescriptor *dDscr, long idx, FramePacket *frp,
			    hrtime_t tstamp, uint32_t thrid,
			    Vector<DbeInstr*>* natpcs, bool natpc_added,
			    cstk_ctx_chunk *cstCtxChunk)
{
  Vector<Histable*> *jpcs = NULL;
  cstk_ctx *cstctx = NULL;
  if (cstCtxChunk != NULL)
    {
      cstctx = cstCtxChunk->cstCtxAr[idx % CSTCTX_CHUNK_SZ];
      jpcs = cstctx->jpcs;
      jpcs->reset ();
    }
  if (jpcs == NULL)
    {
      // this is when we are not doing the pipeline optimization
      // Temporary array for resolved addresses
      // [leaf_pc .. root_pc] == [0..stack_size-1]
      // Leave room for a possible "truncated" frame
      if (jpcsP == NULL)
	jpcsP = new Vector<Histable*>;
      jpcs = jpcsP;
      jpcs->reset ();
    }

  //
  // Construct the user stack
  //
  // Construct Java user stack
  int jstack_size = frp->stackSize (true);
  if (jstack_size)
    {
      // jpcs = new Vector<Histable*>( jstack_size );
      if (frp->isTruncatedStack (true))
	{
	  Function *truncf = dbeSession->getSpecialFunction (DbeSession::TruncatedStackFunc);
	  jpcs->append (truncf->find_dbeinstr (0, 0));
	}

      int nind = natpcs->size () - 1; // first native frame
      for (int jind = jstack_size - 1; jind >= 0; jind--)
	{
	  bool jleaf = (jind == 0); // is current java frame a leaf?
	  Vaddr mid = frp->getMthdFromStack (jind);
	  int bci = frp->getBciFromStack (jind);
	  DbeInstr *cur_instr = experiment->map_jmid_to_PC (mid, bci, tstamp);
	  jpcs->append (cur_instr);
	  if (bci == JNI_MARKER)
	    {
	      JMethod *j_method = (JMethod*) cur_instr->func;
	      // Find matching native function on the native stack
	      bool found = false;
	      for (; nind >= 0; nind--)
		{
		  DbeInstr *nat_addr = natpcs->fetch (nind);
		  if (0 == nat_addr)
		    continue;
		  Function *nat_func = nat_addr->func;
		  if (!found && j_method->jni_match (nat_func))
		    found = true;
		  if (found)
		    {
		      // XXX omazur: the following will skip JNI native method
		      // implemented in JVM itself.
		      // If we are back in JVM switch to processing Java
		      // frames if there are any.
		      if ((nat_func->module->loadobject->flags & SEG_FLAG_JVM) && !jleaf)
			break;
		      jpcs->append (nat_addr);
		    }
		}
	    }
	}
    }
  add_stack_java_epilogue (dDscr, idx, frp, tstamp, thrid, natpcs, jpcs, natpc_added);
}

// This is one iteration if the fourth stage of
// resolve_frame_info + add_stack pipeline.
// It adds the native and java stacks to the stackmap

void
CallStackP::add_stack_java_epilogue (DataDescriptor *dDscr, long idx, FramePacket *frp, hrtime_t tstamp, uint32_t thrid, Vector<DbeInstr*>* natpcs, Vector<Histable*> *jpcs, bool natpc_added)
{
  CallStackNode *node = NULL;
  if (!natpc_added)
    {
      node = (CallStackNode *) add_stack ((Vector<Histable*>*)natpcs);
      dDscr->setObjValue (PROP_MSTACK, idx, node);
      dDscr->setObjValue (PROP_XSTACK, idx, node);
      dDscr->setObjValue (PROP_USTACK, idx, node);
    }

  int jstack_size = frp->stackSize (true);
  if (jstack_size)
    {
      if (jpcs != NULL)
	node = (CallStackNode *) add_stack_d (jpcs);
      if (node == NULL)
	node = (CallStackNode*) dDscr->getObjValue (PROP_USTACK, idx);
      dDscr->setObjValue (PROP_USTACK, idx, node);
      Function *func = (Function*) node->instr->convertto (Histable::FUNCTION);
      if (func != dbeSession->get_JUnknown_Function ())
	dDscr->setObjValue (PROP_XSTACK, idx, node);
    }

  JThread *jthread = experiment->map_pckt_to_Jthread (thrid, tstamp);
  if (jthread == JTHREAD_NONE && jstack_size != 0 && node != NULL)
    {
      Function *func = (Function*) node->instr->convertto (Histable::FUNCTION);
      if (func != dbeSession->get_JUnknown_Function ())
	jthread = JTHREAD_DEFAULT;
    }
  dDscr->setObjValue (PROP_JTHREAD, idx, jthread);
  if (jthread == JTHREAD_NONE || (jthread != JTHREAD_DEFAULT && jthread->is_system ()))
    {
      if (jvm_node == NULL)
	{
	  Function *jvm = dbeSession->get_jvm_Function ();
	  if (jvm)
	    {
	      jvm_node = new_Node (root, jvm->find_dbeinstr (0, 0));
	      CommonPacket::jvm_overhead = jvm_node;
	    }
	}
      dDscr->setObjValue (PROP_USTACK, idx, jvm_node);
    }
}

// This is one iteration of the 2nd stage of
// resolve_frame_info + add_stack() pipeline. Builds the stack for a given framepacket.
// When pipeline optimization is turnd off, cstctxchunk passed is NULL
void
CallStackP::add_stack (DataDescriptor *dDscr, long idx, FramePacket *frp,
		       cstk_ctx_chunk* cstCtxChunk)
{
  Vector<DbeInstr*> *natpcs = NULL;
  cstk_ctx *cstctx = NULL;
  int stack_size = frp->stackSize ();
  if (cstCtxChunk != NULL)
    {
      cstctx = cstCtxChunk->cstCtxAr[idx % CSTCTX_CHUNK_SZ];
      natpcs = cstctx->natpcs;
      natpcs->reset ();
    }
  if (natpcs == NULL)
    {
      // this is when we are not doing the pipeline optimization
      // Temporary array for resolved addresses
      // [leaf_pc .. root_pc] == [0..stack_size-1]
      // Leave room for a possible "truncated" frame
      if (natpcsP == NULL)
	natpcsP = new Vector<DbeInstr*>;
      natpcs = natpcsP;
      natpcs->reset ();
    }

  bool leaf = true;
  hrtime_t tstamp = (hrtime_t) dDscr->getLongValue (PROP_TSTAMP, idx);
  uint32_t thrid = (uint32_t) dDscr->getIntValue (PROP_THRID, idx);

  enum
  {
    NONE,
    CHECK_O7,
    USE_O7,
    SKIP_O7
  } state = NONE;

  Vaddr o7_to_skip = 0;
  for (int index = 0; index < stack_size; index++)
    {
      if (frp->isLeafMark (index))
	{
	  state = CHECK_O7;
	  continue;
	}

      if (state == SKIP_O7)
	{
	  // remember this bad o7 value since OMP might not recognize it
	  o7_to_skip = frp->getFromStack (index);
	  state = NONE;
	  continue;
	}

      Vaddr va = frp->getFromStack (index);
      DbeInstr *cur_instr = experiment->map_Vaddr_to_PC (va, tstamp);
#if ARCH(Intel)// TBR? FIXUP_XXX_SPARC_LINUX: switch should be on experiment ARCH, not dbe ARCH
      // We need to adjust return addresses on intel
      // in order to attribute inclusive metrics to
      // proper call instructions.
      if (experiment->exp_maj_version <= 9)
	if (!leaf && cur_instr->addr != 0)
	  cur_instr = cur_instr->func->find_dbeinstr (0, cur_instr->addr - 1);
#endif

      // Skip PC's from PLT, update leaf and state accordingly
      if ((cur_instr->func->flags & FUNC_FLAG_PLT)
	   && (leaf || state == CHECK_O7))
	{
	  if (state == CHECK_O7)
	    state = USE_O7;
	  leaf = false;
	  continue;
	}
      if (state == CHECK_O7)
	{
	  state = USE_O7;
	  uint64_t saddr = cur_instr->func->save_addr;
	  if (cur_instr->func->isOutlineFunction)
	    // outline functions assume 'save' instruction
	    // Note: they accidentally have saddr == FUNC_ROOT
	    state = SKIP_O7;
	  else if (saddr == FUNC_ROOT)
	    {
	      // If a function is statically determined as a root
	      // but dynamically appears not, don't discard o7.
	      // One such case is __misalign_trap_handler on sparcv9.
	      if (stack_size == 3)
		state = SKIP_O7;
	    }
	  else if (saddr != FUNC_NO_SAVE && cur_instr->addr > saddr)
	    state = SKIP_O7;
	}
      else if (state == USE_O7)
	{
	  state = NONE;
	  if (cur_instr->flags & PCInvlFlag)
	    continue;
	}
      if (leaf)
	{
	  Vaddr evpc = (Vaddr) dDscr->getLongValue (PROP_VIRTPC, idx);
	  if (evpc != 0
	      && !(index > 0 && frp->isLeafMark (index - 1)
		   && evpc == (Vaddr) (-1)))
	    {
	      /* contains hwcprof info */
	      cur_instr->func->module->read_hwcprof_info ();

	      // complete ABS validation of candidate eventPC/eventEA
	      // and correction/adjustment of collected callstack leaf PC
	      DbeInstr *candPC = experiment->map_Vaddr_to_PC (evpc, tstamp);
	      Vaddr vaddr = (Vaddr) dDscr->getLongValue (PROP_VADDR, idx);
	      Vaddr tmp_vaddr = vaddr;
	      int abst_type;
	      uint32_t tag = dDscr->getIntValue (PROP_HWCTAG, idx);
	      if (tag < 0 || tag >= MAX_HWCOUNT)
		abst_type = ABST_NOPC;
	      else
		abst_type = experiment->coll_params.hw_tpc[tag];

	      // We need to adjust addresses for ABST_EXACT_PEBS_PLUS1
	      // (Nehalem/SandyBridge PEBS identifies PC+1, not PC)
	      if (abst_type == ABST_EXACT_PEBS_PLUS1 && candPC->addr != 0)
		candPC = candPC->func->find_dbeinstr (0, candPC->func->find_previous_addr (candPC->addr));

	      cur_instr = adjustEvent (cur_instr, candPC, tmp_vaddr, abst_type);
	      if (vaddr != tmp_vaddr)
		{
		  if (tmp_vaddr < ABS_CODE_RANGE)
		    {
		      /* post processing backtrack failed */
		      dDscr->setValue (PROP_VADDR, idx, tmp_vaddr);
		      dDscr->setValue (PROP_PADDR, idx, ABS_NULL);
		      /* hwcp->eventVPC =  xxxxx leave eventPC alone,
		       *   or can we set it to leafpc? */
		      dDscr->setValue (PROP_PHYSPC, idx, ABS_NULL);
		    }
		  else
		    {
		      /* internal error: why would post-processing modify vaddr? */
		      dDscr->setValue (PROP_PADDR, idx, (Vaddr) (-1));
		      dDscr->setValue (PROP_PHYSPC, idx, (Vaddr) (-1));
		    }
		}
	    }
	}
      natpcs->append (cur_instr);
      leaf = false;

      // A hack to deceive the user into believing that outlined code
      // is called from the base function
      DbeInstr *drvd = cur_instr->func->derivedNode;
      if (drvd != NULL)
	natpcs->append (drvd);
    }
  if (frp->isTruncatedStack ())
    {
      Function *truncf = dbeSession->getSpecialFunction (DbeSession::TruncatedStackFunc);
      natpcs->append (truncf->find_dbeinstr (0, 0));
    }
  else if (frp->isFailedUnwindStack ())
    {
      Function *funwf = dbeSession->getSpecialFunction (DbeSession::FailedUnwindFunc);
      natpcs->append (funwf->find_dbeinstr (0, 0));
    }

  CallStackNode *node = (CallStackNode*) add_stack ((Vector<Histable*>*)natpcs);
  dDscr->setObjValue (PROP_MSTACK, idx, node);
  dDscr->setObjValue (PROP_XSTACK, idx, node);
  dDscr->setObjValue (PROP_USTACK, idx, node);

  // OpenMP 3.0 stacks
  stack_size = frp->ompstack->size ();
  if (stack_size > 0 || frp->omp_state == OMP_IDLE_STATE)
    {
      Function *func;
      Vector<Histable*> *omppcs = new Vector<Histable*>(stack_size);
      Vector<Histable*> *ompxpcs = new Vector<Histable*>(stack_size);
      switch (frp->omp_state)
	{
	case OMP_IDLE_STATE:
	case OMP_RDUC_STATE:
	case OMP_IBAR_STATE:
	case OMP_EBAR_STATE:
	case OMP_LKWT_STATE:
	case OMP_CTWT_STATE:
	case OMP_ODWT_STATE:
	case OMP_ATWT_STATE:
	  {
	    func = dbeSession->get_OMP_Function (frp->omp_state);
	    DbeInstr *instr = func->find_dbeinstr (0, 0);
	    omppcs->append (instr);
	    ompxpcs->append (instr);
	    break;
	  }
	}
      Vector<Vaddr> *stck = frp->ompstack;
      leaf = true;
      for (int index = 0; index < stack_size; index++)
	{
	  if (stck->fetch (index) == SP_LEAF_CHECK_MARKER)
	    {
	      state = CHECK_O7;
	      continue;
	    }
	  if (state == SKIP_O7)
	    {
	      state = NONE;
	      continue;
	    }

	  // The OMP stack might not have enough information to know to discard a bad o7.
	  // So just remember what the native stack skipped.
	  if (o7_to_skip == stck->fetch (index))
	    {
	      state = NONE;
	      continue;
	    }
	  Vaddr va = stck->fetch (index);
	  DbeInstr *cur_instr = experiment->map_Vaddr_to_PC (va, tstamp);

	  // Skip PC's from PLT, update leaf and state accordingly
	  if ((cur_instr->func->flags & FUNC_FLAG_PLT) &&
	      (leaf || state == CHECK_O7))
	    {
	      if (state == CHECK_O7)
		state = USE_O7;
	      leaf = false;
	      continue;
	    }
	  if (state == CHECK_O7)
	    {
	      state = USE_O7;
	      uint64_t saddr = cur_instr->func->save_addr;
	      if (cur_instr->func->isOutlineFunction)
		// outline functions assume 'save' instruction
		// Note: they accidentally have saddr == FUNC_ROOT
		state = SKIP_O7;
	      else if (saddr == FUNC_ROOT)
		{
		  // If a function is statically determined as a root
		  // but dynamically appears not, don't discard o7.
		  // One such case is __misalign_trap_handler on sparcv9.
		  if (stack_size == 3)
		    state = SKIP_O7;
		}
	      else if (saddr != FUNC_NO_SAVE && cur_instr->addr > saddr)
		state = SKIP_O7;
	    }
	  else if (state == USE_O7)
	    {
	      state = NONE;
	      if (cur_instr->flags & PCInvlFlag)
		continue;
	    }

	  DbeLine *dbeline = (DbeLine*) cur_instr->convertto (Histable::LINE);
	  if (cur_instr->func->usrfunc)
	    {
	      dbeline = dbeline->sourceFile->find_dbeline (cur_instr->func->usrfunc, dbeline->lineno);
	      omppcs->append (dbeline);
	    }
	  else if (dbeline->lineno > 0)
	    omppcs->append (dbeline);
	  else
	    omppcs->append (cur_instr);
	  if (dbeline->is_set (DbeLine::OMPPRAGMA) &&
	      frp->omp_state == OMP_WORK_STATE)
	    dDscr->setValue (PROP_OMPSTATE, idx, OMP_OVHD_STATE);
	  ompxpcs->append (cur_instr);
	  leaf = false;
	}
      if (frp->omptruncated == SP_TRUNC_STACK_MARKER)
	{
	  func = dbeSession->getSpecialFunction (DbeSession::TruncatedStackFunc);
	  DbeInstr *instr = func->find_dbeinstr (0, 0);
	  omppcs->append (instr);
	  ompxpcs->append (instr);
	}
      else if (frp->omptruncated == SP_FAILED_UNWIND_MARKER)
	{
	  func = dbeSession->getSpecialFunction (DbeSession::FailedUnwindFunc);
	  DbeInstr *instr = func->find_dbeinstr (0, 0);
	  omppcs->append (instr);
	  ompxpcs->append (instr);
	}

      // User model call stack
      node = (CallStackNode*) add_stack (omppcs);
      dDscr->setObjValue (PROP_USTACK, idx, node);
      delete omppcs;

      // Expert call stack
      node = (CallStackNode*) add_stack (ompxpcs);
      dDscr->setObjValue (PROP_XSTACK, idx, node);
      delete ompxpcs;
      dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT);
      return;
    }

  // OpenMP 2.5 stacks
  if (frp->omp_cprid || frp->omp_state)
    {
      DataView *dview = experiment->getOpenMPdata ();
      if (dview == NULL)
	{
	  // It appears we may get OMP_SERL_STATE from a passive libmtsk
	  dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT);
	  return;
	}
      if (dview->getDataDescriptor () == dDscr)
	{
	  // Don't process the user stack for OpenMP fork events yet
	  dDscr->setObjValue (PROP_USTACK, idx, (void*) NULL);
	  dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT);
	  return;
	}
      Vector<Histable*> *omppcs = new Vector<Histable*>(stack_size);

      // Construct OMP user stack
      // Find the bottom frame
      int btm = 0;
      switch (frp->omp_state)
	{
	case OMP_IDLE_STATE:
	  {
	    Function *func = dbeSession->get_OMP_Function (frp->omp_state);
	    omppcs->append (func->find_dbeinstr (0, 0));
	    // XXX: workaround for inconsistency between OMP_IDLE_STATE
	    // and omp_cprid != 0
	    frp->omp_cprid = 0;
	    btm = natpcs->size ();
	    break;
	  }
	case OMP_RDUC_STATE:
	case OMP_IBAR_STATE:
	case OMP_EBAR_STATE:
	case OMP_LKWT_STATE:
	case OMP_CTWT_STATE:
	case OMP_ODWT_STATE:
	case OMP_ATWT_STATE:
	  {
	    Function *func = dbeSession->get_OMP_Function (frp->omp_state);
	    omppcs->append (func->find_dbeinstr (0, 0));
	    bool inOMP = false;
	    for (btm = 0; btm < natpcs->size (); btm++)
	      {
		LoadObject *lo = natpcs->fetch (btm)->func->module->loadobject;
		if (!inOMP)
		  {
		    if (lo->flags & SEG_FLAG_OMP)
		      inOMP = true;
		  }
		else if (!(lo->flags & SEG_FLAG_OMP))
		  break;
	      }
	    break;
	  }
	case OMP_NO_STATE:
	case OMP_WORK_STATE:
	case OMP_SERL_STATE:
	default:
	  break;
	}

      // Find the top frame
      int top = -1;
      switch (frp->omp_state)
	{
	case OMP_IDLE_STATE:
	  break;
	default:
	  {
	    dview->sort (PROP_CPRID);
	    Datum tval;
	    tval.setUINT64 (frp->omp_cprid);
	    long pidx = dview->getIdxByVals (&tval, DataView::REL_EQ);
	    if (pidx < 0)   // No parent. Process the entire nat_stack
	      top = natpcs->size () - 1;
	    else
	      {
		uint32_t pthrid = (uint32_t) dview->getIntValue (PROP_THRID, pidx);
		if (thrid != pthrid)
		  {
		    // Parent is on a different stack.
		    // Process the entire nat_stack. Skip libthread.
		    for (top = natpcs->size () - 1; top >= 0; top--)
		      {
			DbeInstr *instr = natpcs->fetch (top);
			if (instr->func->module->loadobject->flags & SEG_FLAG_OMP)
			  break;
		      }
		    if (top < 0) // None found. May be incomplete call stack
		      top = natpcs->size () - 1;
		  }
		else
		  {
		    // Parent is on the same stack. Find match.
		    top = natpcs->size () - 1;
		    void *pnat_stack = dview->getObjValue (PROP_MSTACK, pidx);
		    Vector<Histable*> *ppcs = getStackPCs (pnat_stack);
		    for (int ptop = ppcs->size () - 1; top >= 0 && ptop >= 0;
			    top--, ptop--)
		      {
			if (natpcs->fetch (top) != ppcs->fetch (ptop))
			  break;
		      }
		    delete ppcs;
		  }
	      }
	    // If no frames are found for Barrier/Reduction save at least one
	    if ((frp->omp_state == OMP_RDUC_STATE
		 || frp->omp_state == OMP_IBAR_STATE
		 || frp->omp_state == OMP_EBAR_STATE)
		&& top < btm && btm < natpcs->size ())
	      top = btm;
	  }
	}
      for (int i = btm; i <= top; ++i)
	{
	  DbeInstr *instr = natpcs->fetch (i);
	  if (instr->func->module->loadobject->flags & SEG_FLAG_OMP)
	    continue; // Skip all frames from libmtsk
	  omppcs->append (instr);
	}
      node = find_preg_stack (frp->omp_cprid);
      while (node != root)
	{
	  omppcs->append (node->instr);
	  node = node->ancestor;
	}
      node = (CallStackNode *) add_stack (omppcs);
      dDscr->setObjValue (PROP_USTACK, idx, node);
      delete omppcs;
      dDscr->setObjValue (PROP_JTHREAD, idx, JTHREAD_DEFAULT);
      return;
    }

  // Construct Java user stack
  add_stack_java (dDscr, idx, frp, tstamp, thrid, natpcs, true, NULL);
}

// adjustment of leafPC/eventVA for XHWC packets with candidate eventPC
//  Called from CallStack during initial processing of the events
DbeInstr *
CallStackP::adjustEvent (DbeInstr *leafPC, DbeInstr *candPC, Vaddr &eventVA,
			 int abst_type)
{
  // increment counter of dataspace events
  experiment->dsevents++;
  bool isPrecise;
  if (abst_type == ABST_EXACT_PEBS_PLUS1)
    isPrecise = true;
  else if (abst_type == ABST_EXACT)
    isPrecise = true;
  else
    isPrecise = false;

  if (isPrecise)
    /* precise backtracking */
    /* assume within 1 instruction of leaf (this could be checked here) */
    // no change to eventVA or candPC
    return candPC;

  Function *func = leafPC->func;
  unsigned int bt_entries = func->module->bTargets.size ();
  DbeInstr *bestPC = NULL;

  // bt == branch target (potential destination of a branch
  if (bt_entries == 0)
    { // no XHWCprof info for this module
      // increment counter
      experiment->dsnoxhwcevents++;

      // see if event is to be processed anyway
      if (!dbeSession->check_ignore_no_xhwcprof ())
	{
	  // Don't ignore error
	  // XXX -- set error code in event VA -- replace with other mechanism
	  if (eventVA > ABS_CODE_RANGE)
	    eventVA = ABS_NULL;
	  eventVA |= ABS_NO_CTI_INFO; // => effective address can't be validated
	  bestPC = leafPC; // => no PC correction possible
	}
      else
	bestPC = candPC; // assume the event valid
    }
  else
    {
      // we have the info to verify the backtracking
      target_info_t *bt;
      int bt_entry = bt_entries;
      uint64_t leafPC_offset = func->img_offset + leafPC->addr;
      uint64_t candPC_offset = candPC->func->img_offset + candPC->addr;
      do
	{
	  bt_entry--;
	  bt = func->module->bTargets.fetch (bt_entry);
	  /* bts seem to be sorted by offset, smallest to largest */
	}
      while (bt_entry > 0 && bt->offset > leafPC_offset);
      /* if bt_entry == 0, all items have been checked */

      if (bt->offset > leafPC_offset)
	{ /* XXXX isn't is possible that all bt's are after leafPC_offset? */
	  bestPC = leafPC; // actual event PC can't be determined
	  if (eventVA > ABS_CODE_RANGE)
	    eventVA = ABS_NULL;
	  eventVA |= ABS_INFO_FAILED; // effective address can't be validated
	}
      else if (bt->offset > candPC_offset)
	{
	  // use synthetic PC corresponding to bTarget
	  bestPC = func->find_dbeinstr (PCTrgtFlag, bt->offset - func->img_offset);
	  if (eventVA > ABS_CODE_RANGE)
	    eventVA = ABS_NULL;
	  eventVA |= ABS_CTI_TARGET; // effective  address can't be validated
	}
      else
	bestPC = candPC;    // accept provided virtual address as valid
    }
  return bestPC;
}

void *
CallStackP::add_stack_d (Vector<Histable*> *objs)
{
  // objs: root..leaf
  // Reverse objs
  for (int i = 0, j = objs->size () - 1; i < j; ++i, --j)
    objs->swap (i, j);
  return add_stack (objs);
}

CallStackNode::CallStackNode (CallStackNode *_ancestor, Histable *_instr)
{
  ancestor = _ancestor;
  instr = _instr;
  alt_node = NULL;
}

CallStackNode::~CallStackNode () { }

bool
CallStackNode::compare (long start, long end, Vector<Histable*> *objs, CallStackNode *mRoot)
{
  CallStackNode *p = this;
  for (long i = start; i < end; i++, p = p->get_ancestor ())
    if (p == NULL || p->get_instr () != objs->get (i))
      return false;
  return p == mRoot;
}

void
CallStackNode::dump ()
{
  const char *s = "";
  int sz = 0;
  for (CallStackNode *p = this; p; p = p->get_ancestor ())
    {
      fprintf (stderr, NTXT ("%.*s 0x%08llx id=0x%08llx %s\n"), sz, s,
	       (long long) p, (long long) p->get_instr ()->id,
	       STR (p->get_instr ()->get_name ()));
      s = "-";
      sz += 1;
    }
}

long total_calls_add_stack, total_stacks, total_nodes, call_stack_size[201];

void *
CallStackP::add_stack (Vector<Histable*> *objs)
{
  // objs: leaf..root
  uint64_t hash = objs->size ();
  for (long i = objs->size () - 1; i >= 0; --i)
    hash ^= (unsigned long long) objs->get (i);

  uint64_t key = hash ? hash : 1;
  CallStackNode *node = cstackMap->get (key);
#ifdef DEBUG
  if (DUMP_CALL_STACK)
    {
      total_calls_add_stack++;
      call_stack_size[objs->size () > 200 ? 200 : objs->size ()]++;
      Dprintf (DUMP_CALL_STACK,
	       "add_stack: %lld size=%lld  key=0x%08llx cashNode=0x%08llx\n",
	       (long long) total_calls_add_stack, (long long) objs->size (),
	       (long long) key, (long long) node);
      for (long i = 0, sz = VecSize (objs); i < sz; i++)
	Dprintf (DUMP_CALL_STACK, "  add_stack: %.*s 0x%08llx id=0x%08llx %s\n",
		 (int) i, NTXT (" "), (long long) objs->get (i),
		 (long long) objs->get (i)->id, STR (objs->get (i)->get_name ()));
    }
#endif
  if (node && node->compare (0, objs->size (), objs, root))
    {
      Dprintf (DUMP_CALL_STACK, NTXT ("STACK FOUND: key=0x%08llx 0x%08llx id=0x%08llx %s\n"),
	       (long long) key, (long long) node,
	       (long long) node->get_instr ()->id,
	       STR (node->get_instr ()->get_name ()));
      return node;
    }
  node = root;
  for (long i = objs->size () - 1; i >= 0; i--)
    {
      Histable *instr = objs->get (i);
      int old_count = node->count;
      int left;
      CallStackNode *nd = node->find (instr, &left);
      if (nd)
	{
	  node = nd;
	  continue;
	}
      cstackLock->aquireLock (); // Use one lock for all nodes
      // node->aquireLock();
      if (old_count != node->count)
	{
	  nd = node->find (instr, &left);
	  if (nd)
	    { // the other thread has created this node
	      cstackLock->releaseLock ();
	      // node->releaseLock();
	      node = nd;
	      continue;
	    }
	}
      // New Call Stack
      total_stacks++;
      nd = node;
      CallStackNode *first = NULL;
      do
	{
	  CallStackNode *anc = node;
	  total_nodes++;
	  node = new_Node (anc, objs->get (i));
	  if (first)
	    anc->append (node);
	  else
	    first = node;
	}
      while (i-- > 0);
      nd->insert (left, first);
      cstackLock->releaseLock ();
      // nd->releaseLock();
      break;
    }
  cstackMap->put (key, node);
  if (DUMP_CALL_STACK)
    node->dump ();
  return node;
}

CallStackNode *
CallStackP::get_node (int n)
{
  if (n < nodes)
    return &chunks[n / CHUNKSZ][n % CHUNKSZ];
  return NULL;
}

/*
 *  Debugging methods
 */
void
CallStackP::print (FILE *fd)
{
  FILE *f = (fd == NULL ? stderr : fd);
  fprintf (f, GTXT ("CallStack: nodes = %d\n\n"), nodes);
  int maxdepth = 0;
  int maxwidth = 0;
  const char *t;
  char *n;
  for (int i = 0; i < nodes; i++)
    {
      CallStackNode *node = &chunks[i / CHUNKSZ][i % CHUNKSZ];
      Histable *instr = node->instr;
      if (instr->get_type () == Histable::LINE)
	{
	  t = "L";
	  n = ((DbeLine *) instr)->func->get_name ();
	}
      else if (instr->get_type () == Histable::INSTR)
	{
	  t = "I";
	  n = ((DbeInstr *) instr)->func->get_name ();
	}
      else
	{
	  t = "O";
	  n = instr->get_name ();
	}
      long long addr = (long long) instr->get_addr ();
      fprintf (f, GTXT ("node: 0x%016llx anc: 0x%016llx -- 0x%016llX:  %s %s\n"),
	       (unsigned long long) node, (unsigned long long) node->ancestor,
	       addr, t, n);
    }
  fprintf (f, GTXT ("md = %d, mw = %d\n"), maxdepth, maxwidth);
}

/*
 *  Static CallStack methods
 */
CallStack *
CallStack::getInstance (Experiment *exp)
{
  return new CallStackP (exp);
}

int
CallStack::stackSize (void *stack)
{
  CallStackNode *node = (CallStackNode *) stack;
  int sz = 0;
  for (; node; node = node->ancestor)
    sz++;
  return sz - 1; // don't count the root node
}

Histable *
CallStack::getStackPC (void *stack, int n)
{
  CallStackNode *node = (CallStackNode *) stack;
  while (n-- && node)
    node = node->ancestor;
  if (node == NULL)
    return dbeSession->get_Unknown_Function ()->find_dbeinstr (PCInvlFlag, 0);
  return node->instr;
}

Vector<Histable*> *
CallStack::getStackPCs (void *stack, bool get_hide_stack)
{
  Vector<Histable*> *res = new Vector<Histable*>;
  CallStackNode *node = (CallStackNode *) stack;
  if (get_hide_stack && node->alt_node != NULL)
    node = node->alt_node;
  while (node && node->ancestor)
    { // skip the root node
      res->append (node->instr);
      node = node->ancestor;
    }
  return res;
}

int
CallStack::compare (void *stack1, void *stack2)
{
  // Quick comparision
  if (stack1 == stack2)
    return 0;

  CallStackNode *node1 = (CallStackNode *) stack1;
  CallStackNode *node2 = (CallStackNode *) stack2;
  while (node1 != NULL && node2 != NULL)
    {
      //to keep the result const on different platforms
      //we use instr->id instead of instr
      if (node1->instr->id < node2->instr->id)
	return -1;
      else if (node1->instr->id > node2->instr->id)
	return 1;
      node1 = node1->ancestor;
      node2 = node2->ancestor;
    }
  if (node1 == NULL && node2 != NULL)
    return -1;
  else if (node1 != NULL && node2 == NULL)
    return 1;
  else
    return 0;
}

// LIBRARY VISIBILITY

void
CallStack::setHideStack (void *stack, void *hideStack)
{
  CallStackNode *hNode = (CallStackNode *) stack;
  hNode->alt_node = (CallStackNode *) hideStack;
}