/* Copyright (C) 2021-2023 Free Software Foundation, Inc. Contributed by Oracle. This file is part of GNU Binutils. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #include #include #include #include #include #include #include "gp-defs.h" #include "collector.h" #include "gp-experiment.h" #include "memmgr.h" #include "tsd.h" /* Get dynamic module interface*/ #include "collector_module.h" /* Get definitions for SP_LEAF_CHECK_MARKER, SP_TRUNC_STACK_MARKER */ #include "data_pckts.h" #if ARCH(SPARC) struct frame { long fr_local[8]; /* saved locals */ long fr_arg[6]; /* saved arguments [0 - 5] */ struct frame *fr_savfp; /* saved frame pointer */ long fr_savpc; /* saved program counter */ #if WSIZE(32) char *fr_stret; /* struct return addr */ #endif long fr_argd[6]; /* arg dump area */ long fr_argx[1]; /* array of args past the sixth */ }; #elif ARCH(Intel) struct frame { unsigned long fr_savfp; unsigned long fr_savpc; }; #endif /* Set the debug trace level */ #define DBG_LT0 0 #define DBG_LT1 1 #define DBG_LT2 2 #define DBG_LT3 3 int (*__collector_VM_ReadByteInstruction)(unsigned char *) = NULL; #define VM_NO_ACCESS (-1) #define VM_NOT_VM_MEMORY (-2) #define VM_NOT_X_SEGMENT (-3) #define isInside(p, bgn, end) ((p) >= (bgn) && (p) < (end)) /* * Weed through all the arch dependent stuff to get the right definition * for 'pc' in the ucontext structure. The system header files are mess * dealing with all the arch (just look for PC, R_PC, REG_PC). * */ #if ARCH(SPARC) #define IN_BARRIER(x) \ ( barrier_hdl && \ (unsigned long)x >= barrier_hdl && \ (unsigned long)x < barrier_hdlx ) static unsigned long barrier_hdl = 0; static unsigned long barrier_hdlx = 0; #if WSIZE(64) #define STACK_BIAS 2047 #define IN_TRAP_HANDLER(x) \ ( misalign_hdl && \ (unsigned long)x >= misalign_hdl && \ (unsigned long)x < misalign_hdlx ) static unsigned long misalign_hdl = 0; static unsigned long misalign_hdlx = 0; #elif WSIZE(32) #define STACK_BIAS 0 #endif #if WSIZE(64) #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[(reg)]) #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_O6]) #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_PC]) #else #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.gregs[(reg)]) #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_O6]) #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_PC]) #endif #elif ARCH(Intel) #include "opcodes/disassemble.h" static int fprintf_func (void *arg ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...) { return 0; } static int fprintf_styled_func (void *arg ATTRIBUTE_UNUSED, enum disassembler_style st ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...) { return 0; } /* Get LENGTH bytes from info's buffer, at target address memaddr. Transfer them to myaddr. */ static int read_memory_func (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length, disassemble_info *info) { unsigned int opb = info->octets_per_byte; size_t end_addr_offset = length / opb; size_t max_addr_offset = info->buffer_length / opb; size_t octets = (memaddr - info->buffer_vma) * opb; if (memaddr < info->buffer_vma || memaddr - info->buffer_vma > max_addr_offset || memaddr - info->buffer_vma + end_addr_offset > max_addr_offset || (info->stop_vma && (memaddr >= info->stop_vma || memaddr + end_addr_offset > info->stop_vma))) return -1; memcpy (myaddr, info->buffer + octets, length); return 0; } static void print_address_func (bfd_vma addr ATTRIBUTE_UNUSED, disassemble_info *info ATTRIBUTE_UNUSED) { } static asymbol * symbol_at_address_func (bfd_vma addr ATTRIBUTE_UNUSED, disassemble_info *info ATTRIBUTE_UNUSED) { return NULL; } static bfd_boolean symbol_is_valid (asymbol *sym ATTRIBUTE_UNUSED, disassemble_info *info ATTRIBUTE_UNUSED) { return TRUE; } static void memory_error_func (int status ATTRIBUTE_UNUSED, bfd_vma addr ATTRIBUTE_UNUSED, disassemble_info *info ATTRIBUTE_UNUSED) { } #if WSIZE(32) #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EIP]) #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP]) #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EBP]) #elif WSIZE(64) #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP]) #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP]) #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RBP]) #endif /* WSIZE() */ #elif ARCH(Aarch64) #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[15]) #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[13]) #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[14]) #endif /* ARCH() */ /* * FILL_CONTEXT() for all platforms * Could use getcontext() except: * - it's not guaranteed to be async signal safe * - it's a system call and not that lightweight * - it's not portable as of POSIX.1-2008 * So we just use low-level mechanisms to fill in the few fields we need. */ #if ARCH(SPARC) #if WSIZE(32) #define FILL_CONTEXT(context) \ { \ greg_t fp; \ __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \ __asm__ __volatile__( "ta 3" ); \ GET_SP(context) = fp; \ GET_PC(context) = (greg_t)0; \ } #elif WSIZE(64) #define FILL_CONTEXT(context) \ { \ greg_t fp; \ __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \ __asm__ __volatile__( "flushw" ); \ GET_SP(context) = fp; \ GET_PC(context) = (greg_t)0; \ } #endif /* WSIZE() */ #elif ARCH(Intel) #define FILL_CONTEXT(context) \ { \ context->uc_link = NULL; \ void *sp = __collector_getsp(); \ GET_SP(context) = (intptr_t)sp; \ GET_FP(context) = (intptr_t)__collector_getfp(); \ GET_PC(context) = (intptr_t)__collector_getpc(); \ context->uc_stack.ss_sp = sp; \ context->uc_stack.ss_size = 0x100000; \ } #elif ARCH(Aarch64) #define FILL_CONTEXT(context) \ { CALL_UTIL (getcontext) (context); \ context->uc_mcontext.sp = (__u64) __builtin_frame_address(0); \ } #endif /* ARCH() */ static int getByteInstruction (unsigned char *p) { if (__collector_VM_ReadByteInstruction) { int v = __collector_VM_ReadByteInstruction (p); if (v != VM_NOT_VM_MEMORY) return v; } return *p; } struct DataHandle *dhndl = NULL; static unsigned unwind_key = COLLECTOR_TSD_INVALID_KEY; /* To support two OpenMP API's we use a pointer * to the actual function. */ int (*__collector_omp_stack_trace)(char*, int, hrtime_t, void*) = NULL; int (*__collector_mpi_stack_trace)(char*, int, hrtime_t) = NULL; #define DEFAULT_MAX_NFRAMES 256 static int max_native_nframes = DEFAULT_MAX_NFRAMES; static int max_java_nframes = DEFAULT_MAX_NFRAMES; #define NATIVE_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) ) #define JAVA_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) * 2 + 16 ) #define OVERHEAD_BYTES ( 2 * sizeof(long) + 2 * sizeof(Stack_info) ) #define ROOT_UID 801425552975190205ULL #define ROOT_UID_INV 92251691606677ULL #define ROOT_IDX 13907816567264074199ULL #define ROOT_IDX_INV 2075111ULL #define UIDTableSize 1048576 static volatile uint64_t *UIDTable = NULL; static volatile int seen_omp = 0; static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode); static FrameInfo compute_uid (Frame_packet *frp); static int omp_no_walk = 0; #if ARCH(Intel) #define ValTableSize 1048576 #define OmpValTableSize 65536 static unsigned long *AddrTable_RA_FROMFP = NULL; // Cache for RA_FROMFP pcs static unsigned long *AddrTable_RA_EOSTCK = NULL; // Cache for RA_EOSTCK pcs static struct WalkContext *OmpCurCtxs = NULL; static struct WalkContext *OmpCtxs = NULL; static uint32_t *OmpVals = NULL; static unsigned long *OmpRAs = NULL; static unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend); static int parse_x86_AVX_instruction (unsigned char *pc); struct WalkContext { unsigned long pc; unsigned long sp; unsigned long fp; unsigned long ln; unsigned long sbase; /* stack boundary */ unsigned long tbgn; /* current memory segment start */ unsigned long tend; /* current memory segment end */ }; #endif #if defined(DEBUG) && ARCH(Intel) #include static void dump_stack (int nline) { if ((__collector_tracelevel & SP_DUMP_STACK) == 0) return; enum Constexpr { MAX_SIZE = 1024 }; void *array[MAX_SIZE]; size_t sz = backtrace (array, MAX_SIZE); char **strings = backtrace_symbols (array, sz); DprintfT (SP_DUMP_STACK, "\ndump_stack: %d size=%d\n", nline, (int) sz); for (int i = 0; i < sz; i++) DprintfT (SP_DUMP_STACK, " %3d: %p %s\n", i, array[i], strings[i] ? strings[i] : "???"); } #define dump_targets(nline, ntrg, targets) \ if ((__collector_tracelevel & SP_DUMP_UNWIND) != 0) \ for(int i = 0; i < ntrg; i++) \ DprintfT (SP_DUMP_UNWIND, " %2d: 0x%lx\n", i, (long) targets[i]) #else #define dump_stack(x) #define dump_targets(nline, ntrg, targets) #endif void __collector_ext_unwind_key_init (int isPthread, void * stack) { void * ptr = __collector_tsd_get_by_key (unwind_key); if (ptr == NULL) { TprintfT (DBG_LT2, "__collector_ext_unwind_key_init: cannot get tsd\n"); return; } if (isPthread) { size_t stack_size = 0; void *stack_addr = 0; pthread_t pthread = pthread_self (); pthread_attr_t attr; int err = pthread_getattr_np (pthread, &attr); TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: pthread: 0x%lx err: %d\n", pthread, err); if (err == 0) { err = pthread_attr_getstack (&attr, &stack_addr, &stack_size); if (err == 0) stack_addr = (char*) stack_addr + stack_size; TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: stack_size=0x%lx eos=%p err=%d\n", (long) stack_size, stack_addr, err); err = pthread_attr_destroy (&attr); TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: destroy: %d\n", err); } *(void**) ptr = stack_addr; } else *(void**) ptr = stack; // cloned thread } void __collector_ext_unwind_init (int record) { int sz = UIDTableSize * sizeof (*UIDTable); UIDTable = (uint64_t*) __collector_allocCSize (__collector_heap, sz, 1); if (UIDTable == NULL) { __collector_terminate_expt (); return; } CALL_UTIL (memset)((void*) UIDTable, 0, sz); char *str = CALL_UTIL (getenv)("GPROFNG_JAVA_MAX_CALL_STACK_DEPTH"); if (str != NULL && *str != 0) { char *endptr; int n = CALL_UTIL (strtol)(str, &endptr, 0); if (endptr != str && n >= 0) { if (n < 5) n = 5; if (n > MAX_STACKDEPTH) n = MAX_STACKDEPTH; max_java_nframes = n; } } str = CALL_UTIL (getenv)("GPROFNG_MAX_CALL_STACK_DEPTH"); if (str != NULL && *str != 0) { char *endptr = str; int n = CALL_UTIL (strtol)(str, &endptr, 0); if (endptr != str && n >= 0) { if (n < 5) n = 5; if (n > MAX_STACKDEPTH) n = MAX_STACKDEPTH; max_native_nframes = n; } } TprintfT (DBG_LT0, "GPROFNG_MAX_CALL_STACK_DEPTH=%d GPROFNG_JAVA_MAX_CALL_STACK_DEPTH=%d\n", max_native_nframes, max_java_nframes); omp_no_walk = 1; if (__collector_VM_ReadByteInstruction == NULL) __collector_VM_ReadByteInstruction = (int(*)()) dlsym (RTLD_DEFAULT, "Async_VM_ReadByteInstruction"); #if ARCH(SPARC) #if WSIZE(64) misalign_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler"); misalign_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler_end"); if (misalign_hdlx == 0) misalign_hdlx = misalign_hdl + 292; barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_"); barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_"); if (barrier_hdlx == 0) barrier_hdl = 0; #else barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_"); barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_"); if (barrier_hdlx == 0) barrier_hdl = 0; #endif /* WSIZE() */ #elif ARCH(Intel) sz = ValTableSize * sizeof (*AddrTable_RA_FROMFP); AddrTable_RA_FROMFP = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1); sz = ValTableSize * sizeof (*AddrTable_RA_EOSTCK); AddrTable_RA_EOSTCK = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1); if (omp_no_walk && (__collector_omp_stack_trace != NULL || __collector_mpi_stack_trace != NULL)) { sz = OmpValTableSize * sizeof (*OmpCurCtxs); OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1); sz = OmpValTableSize * sizeof (*OmpCtxs); OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1); sz = OmpValTableSize * sizeof (*OmpVals); OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1); sz = OmpValTableSize * sizeof (*OmpRAs); OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1); if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL) { TprintfT (0, "unwind_init() ERROR: failed; terminating experiment\n"); __collector_terminate_expt (); return; } } #endif /* ARCH() */ if (record) { dhndl = __collector_create_handle (SP_FRINFO_FILE); __collector_log_write ("<%s name=\"%s\" format=\"binary\"/>\n", SP_TAG_DATAPTR, SP_FRINFO_FILE); } unwind_key = __collector_tsd_create_key (sizeof (void*), NULL, NULL); if (unwind_key == COLLECTOR_TSD_INVALID_KEY) { TprintfT (0, "unwind_init: ERROR: TSD key create failed.\n"); __collector_log_write ("<%s kind=\"%s\" id=\"%d\">TSD key not created\n", SP_TAG_EVENT, SP_JCMD_CERROR, COL_ERROR_GENERAL, SP_TAG_EVENT); return; } TprintfT (0, "unwind_init() completed normally\n"); return; } void __collector_ext_unwind_close () { __collector_delete_handle (dhndl); dhndl = NULL; } void* __collector_ext_return_address (unsigned level) { if (NULL == UIDTable) //unwind not initialized yet return NULL; unsigned size = (level + 4) * sizeof (long); // need to strip __collector_get_return_address and its caller ucontext_t context; FILL_CONTEXT ((&context)); char* buf = (char*) alloca (size); if (buf == NULL) { TprintfT (DBG_LT0, "__collector_get_return_address: ERROR: alloca(%d) fails\n", size); return NULL; } int sz = stack_unwind (buf, size, NULL, NULL, &context, 0); if (sz < (level + 3) * sizeof (long)) { TprintfT (DBG_LT0, "__collector_get_return_address: size=%d, but stack_unwind returns %d\n", size, sz); return NULL; } long *lbuf = (long*) buf; TprintfT (DBG_LT2, "__collector_get_return_address: return %lx\n", lbuf[level + 2]); return (void *) (lbuf[level + 2]); } /* * Collector interface method getFrameInfo */ FrameInfo __collector_get_frame_info (hrtime_t ts, int mode, void *arg) { ucontext_t *context = NULL; void *bptr = NULL; CM_Array *array = NULL; int unwind_mode = 0; int do_walk = 1; if (mode & FRINFO_NO_WALK) do_walk = 0; int bmode = mode & 0xffff; int pseudo_context = 0; if (bmode == FRINFO_FROM_STACK_ARG || bmode == FRINFO_FROM_STACK) { bptr = arg; context = (ucontext_t*) alloca (sizeof (ucontext_t)); FILL_CONTEXT (context); unwind_mode |= bmode; } else if (bmode == FRINFO_FROM_UC) { context = (ucontext_t*) arg; if (context == NULL) return (FrameInfo) 0; if (GET_SP (context) == 0) pseudo_context = 1; } else if (bmode == FRINFO_FROM_ARRAY) { array = (CM_Array*) arg; if (array == NULL || array->length <= 0) return (FrameInfo) 0; } else return (FrameInfo) 0; int max_frame_size = OVERHEAD_BYTES + NATIVE_FRAME_BYTES (max_native_nframes); if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context) max_frame_size += JAVA_FRAME_BYTES (max_java_nframes); Frame_packet *frpckt = alloca (sizeof (Frame_packet) + max_frame_size); frpckt->type = FRAME_PCKT; frpckt->hsize = sizeof (Frame_packet); char *d = (char*) (frpckt + 1); int size = max_frame_size; #define MIN(a,b) ((a)<(b)?(a):(b)) #if defined(GPROFNG_JAVA_PROFILING) /* get Java info */ if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context) { /* use only 2/3 of the buffer and leave the rest for the native stack */ int tmpsz = MIN (size, JAVA_FRAME_BYTES (max_java_nframes)); if (tmpsz > 0) { int sz = __collector_ext_jstack_unwind (d, tmpsz, context); d += sz; size -= sz; } } #endif /* get native stack */ if (context) { Stack_info *sinfo = (Stack_info*) d; int sz = sizeof (Stack_info); d += sz; size -= sz; #if ARCH(Intel) if (omp_no_walk == 0) do_walk = 1; #endif if (do_walk == 0) unwind_mode |= FRINFO_NO_WALK; int tmpsz = MIN (size, NATIVE_FRAME_BYTES (max_native_nframes)); if (tmpsz > 0) { sz = stack_unwind (d, tmpsz, bptr, NULL, context, unwind_mode); d += sz; size -= sz; } sinfo->kind = STACK_INFO; sinfo->hsize = (d - (char*) sinfo); } /* create a stack image from user data */ if (array && array->length > 0) { Stack_info *sinfo = (Stack_info*) d; int sz = sizeof (Stack_info); d += sz; size -= sz; sz = array->length; if (sz > size) sz = size; // YXXX should we mark this with truncation frame? __collector_memcpy (d, array->bytes, sz); d += sz; size -= sz; sinfo->kind = STACK_INFO; sinfo->hsize = (d - (char*) sinfo); } /* Compute the total size */ frpckt->tsize = d - (char*) frpckt; FrameInfo uid = compute_uid (frpckt); return uid; } FrameInfo compute_uid (Frame_packet *frp) { uint64_t idxs[LAST_INFO]; uint64_t uid = ROOT_UID; uint64_t idx = ROOT_IDX; Common_info *cinfo = (Common_info*) ((char*) frp + frp->hsize); char *end = (char*) frp + frp->tsize; for (;;) { if ((char*) cinfo >= end || cinfo->hsize == 0 || (char*) cinfo + cinfo->hsize > end) break; /* Start with a different value to avoid matching with uid */ uint64_t uidt = 1; uint64_t idxt = 1; long *ptr = (long*) ((char*) cinfo + cinfo->hsize); long *bnd = (long*) ((char*) cinfo + sizeof (Common_info)); TprintfT (DBG_LT2, "compute_uid: Cnt=%ld: ", (long) cinfo->hsize); while (ptr > bnd) { long val = *(--ptr); tprintf (DBG_LT2, "0x%8.8llx ", (unsigned long long) val); uidt = (uidt + val) * ROOT_UID; idxt = (idxt + val) * ROOT_IDX; uid = (uid + val) * ROOT_UID; idx = (idx + val) * ROOT_IDX; } if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO) { cinfo->uid = uidt; idxs[cinfo->kind] = idxt; } cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize); } tprintf (DBG_LT2, "\n"); /* Check if we have already recorded that uid. * The following fragment contains benign data races. * It's important, though, that all reads from UIDTable * happen before writes. */ int found1 = 0; int idx1 = (int) ((idx >> 44) % UIDTableSize); if (UIDTable[idx1] == uid) found1 = 1; int found2 = 0; int idx2 = (int) ((idx >> 24) % UIDTableSize); if (UIDTable[idx2] == uid) found2 = 1; int found3 = 0; int idx3 = (int) ((idx >> 4) % UIDTableSize); if (UIDTable[idx3] == uid) found3 = 1; if (!found1) UIDTable[idx1] = uid; if (!found2) UIDTable[idx2] = uid; if (!found3) UIDTable[idx3] = uid; if (found1 || found2 || found3) return (FrameInfo) uid; frp->uid = uid; /* Compress info's */ cinfo = (Common_info*) ((char*) frp + frp->hsize); for (;;) { if ((char*) cinfo >= end || cinfo->hsize == 0 || (char*) cinfo + cinfo->hsize > end) break; if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO) { long *ptr = (long*) ((char*) cinfo + sizeof (Common_info)); long *bnd = (long*) ((char*) cinfo + cinfo->hsize); uint64_t uidt = cinfo->uid; uint64_t idxt = idxs[cinfo->kind]; int found = 0; int first = 1; while (ptr < bnd - 1) { int idx1 = (int) ((idxt >> 44) % UIDTableSize); if (UIDTable[idx1] == uidt) { found = 1; break; } else if (first) { first = 0; UIDTable[idx1] = uidt; } long val = *ptr++; uidt = uidt * ROOT_UID_INV - val; idxt = idxt * ROOT_IDX_INV - val; } if (found) { char *d = (char*) ptr; char *s = (char*) bnd; if (!first) { int i; for (i = 0; i> 8; } } int delta = s - d; while (s < end) *d++ = *s++; cinfo->kind |= COMPRESSED_INFO; cinfo->hsize -= delta; frp->tsize -= delta; end -= delta; } } cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize); } __collector_write_packet (dhndl, (CM_Packet*) frp); return (FrameInfo) uid; } FrameInfo __collector_getUID (CM_Array *arg, FrameInfo suid) { if (arg->length % sizeof (long) != 0 || (long) arg->bytes % sizeof (long) != 0) return (FrameInfo) - 1; if (arg->length == 0) return suid; uint64_t uid = suid ? suid : 1; uint64_t idx = suid ? suid : 1; long *ptr = (long*) ((char*) arg->bytes + arg->length); long *bnd = (long*) (arg->bytes); while (ptr > bnd) { long val = *(--ptr); uid = (uid + val) * ROOT_UID; idx = (idx + val) * ROOT_IDX; } /* Check if we have already recorded that uid. * The following fragment contains benign data races. * It's important, though, that all reads from UIDTable * happen before writes. */ int found1 = 0; int idx1 = (int) ((idx >> 44) % UIDTableSize); if (UIDTable[idx1] == uid) found1 = 1; int found2 = 0; int idx2 = (int) ((idx >> 24) % UIDTableSize); if (UIDTable[idx2] == uid) found2 = 1; int found3 = 0; int idx3 = (int) ((idx >> 4) % UIDTableSize); if (UIDTable[idx3] == uid) found3 = 1; if (!found1) UIDTable[idx1] = uid; if (!found2) UIDTable[idx2] = uid; if (!found3) UIDTable[idx3] = uid; if (found1 || found2 || found3) return (FrameInfo) uid; int sz = sizeof (Uid_packet) + arg->length; if (suid) sz += sizeof (suid); Uid_packet *uidp = alloca (sz); uidp->tsize = sz; uidp->type = UID_PCKT; uidp->flags = 0; uidp->uid = uid; /* Compress */ ptr = (long*) (arg->bytes); bnd = (long*) ((char*) arg->bytes + arg->length); long *dst = (long*) (uidp + 1); uint64_t uidt = uid; uint64_t idxt = idx; uint64_t luid = suid; /* link uid */ while (ptr < bnd) { long val = *ptr++; *dst++ = val; if ((bnd - ptr) > sizeof (uidt)) { uidt = uidt * ROOT_UID_INV - val; idxt = idxt * ROOT_IDX_INV - val; int idx1 = (int) ((idxt >> 44) % UIDTableSize); if (UIDTable[idx1] == uidt) { luid = uidt; break; } } } if (luid) { char *d = (char*) dst; for (int i = 0; i> 8; } uidp->flags |= COMPRESSED_INFO; uidp->tsize = d - (char*) uidp; } __collector_write_packet (dhndl, (CM_Packet*) uidp); return (FrameInfo) uid; } int __collector_getStackTrace (void *buf, int size, void *bptr, void *eptr, void *arg) { if (arg == (void*) __collector_omp_stack_trace) seen_omp = 1; int do_walk = 1; if (arg == NULL || arg == (void*) __collector_omp_stack_trace) { do_walk = (arg == (void*) __collector_omp_stack_trace && omp_no_walk) ? 0 : 1; ucontext_t *context = (ucontext_t*) alloca (sizeof (ucontext_t)); FILL_CONTEXT (context); arg = context; } int unwind_mode = 0; if (do_walk == 0) unwind_mode |= FRINFO_NO_WALK; return stack_unwind (buf, size, bptr, eptr, arg, unwind_mode); } #if ARCH(SPARC) /* * These are important data structures taken from the header files reg.h and * ucontext.h. They are used for the stack trace algorithm explained below. * * typedef struct ucontext { * u_long uc_flags; * struct ucontext *uc_link; * usigset_t uc_sigmask; * stack_t uc_stack; * mcontext_t uc_mcontext; * long uc_filler[23]; * } ucontext_t; * * #define SPARC_MAXREGWINDOW 31 * * struct rwindow { * greg_t rw_local[8]; * greg_t rw_in[8]; * }; * * #define rw_fp rw_in[6] * #define rw_rtn rw_in[7] * * struct gwindows { * int wbcnt; * int *spbuf[SPARC_MAXREGWINDOW]; * struct rwindow wbuf[SPARC_MAXREGWINDOW]; * }; * * typedef struct gwindows gwindows_t; * * typedef struct { * gregset_t gregs; * gwindows_t *gwins; * fpregset_t fpregs; * long filler[21]; * } mcontext_t; * * The stack would look like this when SIGPROF occurrs. * * ------------------------- <- high memory * | | * | | * ------------------------- * | | * ------------------------- <- fp' <-| * | | | * : : | * | | | * ------------------------- | * | fp |----------| * | | * ------------------------- <- sp' * | | | | * | gwins | <- saved stack pointers & | | * | | register windows | |- mcontext * ------------------------- | | * | gregs | <- saved registers | | * ------------------------- | * | | |- ucontext * ------------------------- <- ucp (ucontext pointer) | * | | | * | | |- siginfo * ------------------------- <- sip (siginfo pointer) | * | | * ------------------------- <- sp * * Then the signal handler is called with: * handler( signo, sip, uip ); * When gwins is null, all the stack frames are saved in the user stack. * In that case we can find sp' from gregs and walk the stack for a backtrace. * However, if gwins is not null we will have a more complicated case. * Wbcnt(in gwins) tells you how many saved register windows are valid. * This is important because the kernel does not allocate the entire array. * And the top most frame is saved in the lowest index element. The next * paragraph explains the possible causes. * * There are two routines in the kernel to flush out user register windows. * flush_user_windows and flush_user_windows_to_stack * The first routine will not cause a page fault. Therefore if the user * stack is not in memory, the register windows will be saved to the pcb. * This can happen when the kernel is trying to deliver a signal and * the user stack got swap out. The kernel will then build a new context for * the signal handler and the saved register windows will * be copied to the ucontext as show above. On the other hand, * flush_user_windows_to_stack can cause a page fault, and if it failed * then there is something wrong (stack overflow, misalign). * The first saved register window does not necessary correspond to the * first stack frame. So the current stack pointer must be compare with * the stack pointers in spbuf to find a match. * * We will also follow the uc_link field in ucontext to trace also nested * signal stack frames. * */ /* Dealing with trap handlers. * When a user defined trap handler is invoked the return address * (or actually the address of an instruction that raised the trap) * is passed to the trap handler in %l6, whereas saved %o7 contains * garbage. First, we need to find out if a particular pc belongs * to the trap handler, and if so, take the %l6 value from the stack rather * than %o7 from either the stack or the register. * There are three possible situations represented * by the following stacks: * * MARKER MARKER MARKER * trap handler pc __func pc before 'save' __func pc after 'save' * %l6 %o7 from reg %o7 (garbage) * ... %l6 trap handler pc * ... %l6 * ... * where __func is a function called from the trap handler. * * Currently this is implemented to only deal with __misalign_trap_handler * set for v9 FORTRAN applications. Implementation of IN_TRAP_HANDLER * macro shows it. A general solution is postponed. */ /* Special handling of unwind through the parallel loop barrier code: * * The library defines two symbols, __mt_EndOfTask_Barrier_ and * __mt_EndOfTask_Barrier_Dummy_ representing the first word of * the barrier sychronization code, and the first word following * it. Whenever the leaf PC is between these two symbols, * the unwind code is special-cased as follows: * The __mt_EndOfTask_Barrier_ function is guaranteed to be a leaf * function, so its return address is in a register, not saved on * the stack. * * MARKER * __mt_EndOfTask_Barrier_ PC -- the leaf PC * loop body function address for the task -- implied caller of __mt_EndOfTask_Barrier_ * this address is taken from the %O0 register * {mt_master or mt_slave} -- real caller of __mt_EndOfTask_Barrier_ * ... * * With this trick, the analyzer will show the time in the barrier * attributed to the loop at the end of which the barrier synchronization * is taking place. That loop body routine, will be shown as called * from the function from which it was extracted, which will be shown * as called from the real caller, either the slave or master library routine. */ /* * These no-fault-load (0x82) assembly functions are courtesy of Rob Gardner. * * Note that 0x82 is ASI_PNF. See * http://lxr.free-electrons.com/source/arch/sparc/include/uapi/asm/asi.h#L134 * ASI address space identifier; PNF primary no fault */ /* load an int from an address */ /* if the address is illegal, return a 0 */ static int SPARC_no_fault_load_int (void *addr) { int val; __asm__ __volatile__( "lda [%1] 0x82, %0\n\t" : "=r" (val) : "r" (addr) ); return val; } /* check if an address is invalid * * A no-fault load of an illegal address still faults, but it does so silently to the calling process. * It returns a 0, but so could a load of a legal address. * So, we time the load. A "fast" load must be a successful load. * A "slow" load is probably a fault. * Since it could also be a cache/TLB miss or other abnormality, * it's safest to retry a slow load. * The cost of trying a valid address should be some nanosecs. * The cost of trying an invalid address up to 10 times could be some microsecs. */ #if 0 static int invalid_SPARC_addr(void *addr) { long t1, t2; int i; for (i=0; i<10; i++) { __asm__ __volatile__( "rd %%tick, %0\n\t" "lduba [%2] 0x82, %%g0\n\t" "rd %%tick, %1\n\t" : "=r" (t1), "=r" (t2) : "r" (addr) ); if ( (t2 - t1) < 100 ) return 0; } return 1; } #endif /* * The standard SPARC procedure-calling convention is that the * calling PC (for determining the return address when the procedure * is finished) is placed in register %o7. A called procedure * typically executes a "save" instruction that shifts the register * window, and %o7 becomes %i7. * * Optimized leaf procedures do not shift the register window. * They assume the return address will remain %o7. So when * we process a leaf PC, we walk instructions to see if there * is a call, restore, or other instruction that would indicate * we can IGNORE %o7 because this is NOT a leaf procedure. * * If a limited instruction walk uncovers no such hint, we save * not only the PC but the %o7 value as well... just to be safe. * Later, in DBE post-processing of the call stacks, we decide * whether any recorded %o7 value should be used as a caller * frame or should be discarded. */ #define IS_ILLTRAP(x) (((x) & 0xc1c00000) == 0) #define IS_SAVE(x) (((x) & 0xc1f80000) == 0x81e00000) #define IS_MOVO7R(x) (((x) & 0xc1f8201f) == 0x8160000f) #define IS_MOVRO7(x) (((x) & 0xfff82000) == 0x9f600000) #define IS_ORRG0O7(x) (((x) & 0xff78201f) == 0x9e100000) #define IS_ORG0RO7(x) (((x) & 0xff7fe000) == 0x9e100000) #define IS_ORG0O7R(x) (((x) & 0xc17fe01f) == 0x8010000f) #define IS_ORO7G0R(x) (((x) & 0xc17fe01f) == 0x8013c000) #define IS_RESTORE(x) (((x) & 0xc1f80000) == 0x81e80000) #define IS_RET(x) ((x) == 0x81c7e008) #define IS_RETL(x) ((x) == 0x81c3e008) #define IS_RETURN(x) (((x) & 0xc1f80000) == 0x81c80000) #define IS_BRANCH(x) ((((x) & 0xc0000000) == 0) && (((x) & 0x01c00000) != 0x01000000)) #define IS_CALL(x) (((x) & 0xc0000000) == 0x40000000) #define IS_LDO7(x) (((x) & 0xfff80000) == 0xde000000) static long pagesize = 0; static int process_leaf (long *lbuf, int ind, int lsize, void *context) { greg_t pc = GET_PC (context); greg_t o7 = GET_GREG (context, REG_O7); /* omazur: TBR START -- not used */ if (IN_BARRIER (pc)) { if (ind < lsize) lbuf[ind++] = pc; if (ind < lsize) lbuf[ind++] = GET_GREG (context, REG_O0); return ind; } /* omazur: TBR END */ #if WSIZE(64) if (IN_TRAP_HANDLER (pc)) { if (ind < lsize) lbuf[ind++] = pc; return ind; } #endif unsigned *instrp = (unsigned *) pc; unsigned *end_addr = instrp + 20; while (instrp < end_addr) { unsigned instr = *instrp++; if (IS_ILLTRAP (instr)) break; else if (IS_SAVE (instr)) { if (ind < lsize) lbuf[ind++] = pc; if (o7 && ind < lsize) lbuf[ind++] = o7; return ind; } else if (IS_MOVO7R (instr) || IS_ORG0O7R (instr) || IS_ORO7G0R (instr)) break; else if (IS_MOVRO7 (instr) || IS_ORG0RO7 (instr)) { int rs2 = (instr & 0x1f) + REG_G1 - 1; o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0; break; } else if (IS_ORRG0O7 (instr)) { int rs2 = ((instr & 0x7c000) >> 14) + REG_G1 - 1; o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0; break; } else if (IS_RESTORE (instr)) { o7 = 0; break; } else if (IS_RETURN (instr)) { o7 = 0; break; } else if (IS_RET (instr)) { o7 = 0; break; } else if (IS_RETL (instr)) { /* process delay slot */ instr = *instrp++; if (IS_RESTORE (instr)) o7 = 0; break; } else if (IS_BRANCH (instr)) { unsigned *backbegin = ((unsigned *) pc - 1); unsigned *backend = backbegin - 12 + (instrp - (unsigned *) pc); while (backbegin > backend) { // 21920143 stack unwind: SPARC process_leaf backtracks too far /* * We've already dereferenced backbegin+1. * So if backbegin is on the same page, we're fine. * If we've gone to a different page, possibly things are not fine. * We don't really know how to test that. * Let's just assume the worst: that dereferencing backbegin would segv. * We won't know if we're in a leaf function or not. */ if (pagesize == 0) pagesize = CALL_UTIL (sysconf)(_SC_PAGESIZE); if ((((long) (backbegin + 1)) & (pagesize - 1)) < sizeof (unsigned*)) break; unsigned backinstr = *backbegin--; if (IS_LDO7 (backinstr)) { o7 = 0; break; } else if (IS_ILLTRAP (backinstr)) break; else if (IS_RETURN (backinstr)) break; else if (IS_RET (backinstr)) break; else if (IS_RETL (backinstr)) break; else if (IS_CALL (backinstr)) break; else if (IS_SAVE (backinstr)) { o7 = 0; break; } } break; } else if (IS_CALL (instr)) o7 = 0; } #if WSIZE(64) if (o7 != 0 && ((long) o7) < 32 && ((long) o7) > -32) { /* 20924821 SEGV in unwind code on SPARC/Linux * We've seen this condition in some SPARC-Linux runs. * o7 is non-zero but not a valid address. * Values like 4 or -7 have been seen. * Let's check if o7 is unreasonably small. * If so, set to 0 so that it won't be recorded. * Otherwise, there is risk of it being dereferenced in process_sigreturn(). */ // __collector_log_write("time %lld, internal debug unwind at leaf; o7 = %ld, pc = %x\n", // SP_JCMD_COMMENT, COL_COMMENT_NONE, __collector_gethrtime() - __collector_start_time, (long) o7, pc ); o7 = 0; } #endif if (o7) { if (ind < lsize) lbuf[ind++] = SP_LEAF_CHECK_MARKER; if (ind < lsize) lbuf[ind++] = pc; if (ind < lsize) lbuf[ind++] = o7; } else if (ind < lsize) lbuf[ind++] = pc; return ind; } #if WSIZE(64) // detect signal handler static int process_sigreturn (long *lbuf, int ind, int lsize, unsigned char * tpc, struct frame **pfp, void * bptr, int extra_frame) { // cheap checks whether tpc is obviously not an instruction address if ((4096 > (unsigned long) tpc) // the first page is off limits || (3 & (unsigned long) tpc)) return ind; // the address is not aligned // get the instruction at tpc, skipping over as many as 7 nop's (0x01000000) int insn, i; for (i = 0; i < 7; i++) { insn = SPARC_no_fault_load_int ((void *) tpc); if (insn != 0x01000000) break; tpc += 4; } // we're not expecting 0 (and it could mean an illegal address) if (insn == 0) return ind; // We are looking for __rt_sigreturn_stub with the instruction // 0x82102065 : mov 0x65 /* __NR_rt_sigreturn */, %g1 if (insn == 0x82102065) { /* * according to linux kernel source code, * syscall(_NR_rt_sigreturn) uses the following data in stack: * struct rt_signal_frame { * struct sparc_stackf ss; * siginfo_t info; * struct pt_regs regs; * ....}; * sizeof(struct sparc_stackf) is 192; * sizeof(siginfo_t) is 128; * we need to get the register values from regs, which is defined as: * struct pt_regs { * unsigned long u_regs[16]; * unsigned long tstate; * unsigned long tpc; * unsigned long tnpc; * ....}; * pc and fp register has offset of 120 and 112; * the pc of kill() is stored in tnpc, whose offest is 136. */ greg_t pc = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 136)); greg_t pc1 = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 120)); (*pfp) = *((struct frame**) ((char*) ((*pfp)) + 192 + 128 + 112)); if (pc && pc1) { if (bptr != NULL && extra_frame && ((char*) (*pfp) + STACK_BIAS) < (char*) bptr && ind < 2) { lbuf[0] = pc1; if (ind == 0) ind++; } if (bptr == NULL || ((char*) (*pfp) + STACK_BIAS) >= (char*) bptr) { if (ind < lsize) lbuf[ind++] = (unsigned long) tpc; if (ind < lsize) lbuf[ind++] = pc; if (ind < lsize) lbuf[ind++] = pc1; } } DprintfT (SP_DUMP_UNWIND, "unwind.c: resolved sigreturn pc=0x%lx, pc1=0x%lx, fp=0x%lx\n", pc, pc1, *(pfp)); } return ind; } #endif /* * int stack_unwind( char *buf, int size, ucontext_t *context ) * This routine looks into the mcontext and * trace stack frames to record return addresses. */ int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode) { /* * trace the stack frames from user stack. * We are assuming that the frame pointer and return address * are null when we are at the top level. */ long *lbuf = (long*) buf; int lsize = size / sizeof (long); struct frame *fp = (struct frame *) GET_SP (context); /* frame pointer */ greg_t pc; /* program counter */ int extra_frame = 0; if ((mode & 0xffff) == FRINFO_FROM_STACK) extra_frame = 1; int ind = 0; if (bptr == NULL) ind = process_leaf (lbuf, ind, lsize, context); int extra_frame = 0; if ((mode & 0xffff) == FRINFO_FROM_STACK) extra_frame = 1; int ind = 0; if (bptr == NULL) ind = process_leaf (lbuf, ind, lsize, context); while (fp) { if (ind >= lsize) break; fp = (struct frame *) ((char *) fp + STACK_BIAS); if (eptr && fp >= (struct frame *) eptr) { ind = ind >= 2 ? ind - 2 : 0; break; } #if WSIZE(64) // detect signal handler unsigned char * tpc = ((unsigned char*) (fp->fr_savpc)); struct frame * tfp = (struct frame*) ((char*) (fp->fr_savfp) + STACK_BIAS); int old_ind = ind; ind = process_sigreturn (lbuf, old_ind, lsize, tpc, &tfp, bptr, extra_frame); if (ind != old_ind) { pc = (greg_t) tpc; fp = tfp; } else #endif { #if WSIZE(64) if (IN_TRAP_HANDLER (lbuf[ind - 1])) pc = fp->fr_local[6]; else pc = fp->fr_savpc; #else pc = fp->fr_savpc; #endif fp = fp->fr_savfp; if (pc) { if (bptr != NULL && extra_frame && ((char*) fp + STACK_BIAS) < (char*) bptr && ind < 2) { lbuf[0] = pc; if (ind == 0) ind++; } if (bptr == NULL || ((char*) fp + STACK_BIAS) >= (char*) bptr) lbuf[ind++] = pc; } } /* 4616238: _door_return may have a frame that has non-zero * saved stack pointer and zero pc */ if (pc == (greg_t) NULL) break; } if (ind >= lsize) { /* truncated stack handling */ ind = lsize - 1; lbuf[ind++] = SP_TRUNC_STACK_MARKER; } return ind * sizeof (long); } #elif ARCH(Intel) /* get __NR_ constants */ #include /* * From uts/intel/ia32/os/sendsig.c: * * An amd64 signal frame looks like this on the stack: * * old %rsp: * <128 bytes of untouched stack space> * * * * * new %rsp: * * The signal number and siginfo_t pointer are only pushed onto the stack in * order to allow stack backtraces. The actual signal handling code expects the * arguments in registers. * * An i386 SVR4/ABI signal frame looks like this on the stack: * * old %esp: * * * * * * new %esp: */ #if WSIZE(32) #define OPC_REG(x) ((x)&0x7) #define MRM_REGD(x) (((x)>>3)&0x7) #define MRM_REGS(x) ((x)&0x7) #define RED_ZONE 0 #elif WSIZE(64) #define OPC_REG(x) (B|((x)&0x7)) #define MRM_REGD(x) (R|(((x)>>3)&0x7)) #define MRM_REGS(x) (B|((x)&0x7)) #define RED_ZONE 16 #endif #define MRM_EXT(x) (((x)>>3)&0x7) #define MRM_MOD(x) ((x)&0xc0) #define RAX 0 #define RDX 2 #define RSP 4 #define RBP 5 struct AdvWalkContext { unsigned char *pc; unsigned long *sp; unsigned long *sp_safe; unsigned long *fp; unsigned long *fp_sav; unsigned long *fp_loc; unsigned long rax; unsigned long rdx; unsigned long ra_sav; unsigned long *ra_loc; unsigned long regs[16]; int tidx; /* targets table index */ uint32_t cval; /* cache value */ }; static unsigned long getRegVal (struct AdvWalkContext *cur, int r, int *undefRez) { if (cur->regs[r] == 0) { if (r == RBP) { tprintf (DBG_LT3, "getRegVal: returns cur->regs[RBP]=0x%lx cur->pc=0x%lx\n", (unsigned long) cur->fp, (unsigned long) cur->pc); return (unsigned long) cur->fp; } *undefRez = 1; } tprintf (DBG_LT3, "getRegVal: cur->regs[%d]=0x%lx cur->pc=0x%lx\n", r, (unsigned long) cur->regs[r], (unsigned long) cur->pc); return cur->regs[r]; } static unsigned char * check_modrm (unsigned char *pc) { unsigned char modrm = *pc++; unsigned char mod = MRM_MOD (modrm); if (mod == 0xc0) return pc; unsigned char regs = modrm & 0x07; if (regs == RSP) { if (mod == 0x40) return pc + 2; // SIB + disp8 if (mod == 0x80) return pc + 5; // SIB + disp32 return pc + 1; // SIB } if (mod == 0x0) { if (regs == RBP) pc += 4; // disp32 } else if (mod == 0x40) pc += 1; /* byte */ else if (mod == 0x80) pc += 4; /* word */ return pc; } static int read_int (unsigned char *pc, int w) { if (w == 1) return *((char *) pc); if (w == 2) return *(short*) pc; return *(int*) pc; } /* Return codes */ enum { RA_FAILURE = 0, RA_SUCCESS, RA_END_OF_STACK, RA_SIGRETURN, RA_RT_SIGRETURN }; /* Cache value encodings */ static const uint32_t RA_FROMFP = (uint32_t) - 1; /* get the RA from the frame pointer */ static const uint32_t RA_EOSTCK = (uint32_t) - 2; /* end-of-stack */ #define MAXCTX 16 #define MAXTRGTS 64 #define MAXJMPREG 2 #define MAXJMPREGCTX 3 #define DELETE_CURCTX() __collector_memcpy (cur, buf + (--nctx), sizeof (*cur)) /** * Look for pc in AddrTable_RA_FROMFP and in AddrTable_RA_EOSTCK * @param wctx * @return */ static int cache_get (struct WalkContext *wctx) { unsigned long addr; if (AddrTable_RA_FROMFP != NULL) { uint64_t idx = wctx->pc % ValTableSize; addr = AddrTable_RA_FROMFP[ idx ]; if (addr == wctx->pc) { // Found in AddrTable_RA_FROMFP unsigned long *sp = NULL; unsigned long fp = wctx->fp; /* validate fp before use */ if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp)) return RA_FAILURE; sp = (unsigned long *) fp; fp = *sp++; unsigned long ra = *sp++; unsigned long tbgn = wctx->tbgn; unsigned long tend = wctx->tend; if (ra < tbgn || ra >= tend) if (!__collector_check_segment (ra, &tbgn, &tend, 0)) return RA_FAILURE; unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend); if (npc == 0) return RA_FAILURE; DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached pc=0x%lX\n", __LINE__, npc); wctx->pc = npc; wctx->sp = (unsigned long) sp; wctx->fp = fp; wctx->tbgn = tbgn; wctx->tend = tend; return RA_SUCCESS; } } if (NULL == AddrTable_RA_EOSTCK) return RA_FAILURE; uint64_t idx = wctx->pc % ValTableSize; addr = AddrTable_RA_EOSTCK[ idx ]; if (addr != wctx->pc) return RA_FAILURE; DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached RA_END_OF_STACK\n", __LINE__); return RA_END_OF_STACK; } /** * Save pc in RA_FROMFP or RA_EOSTCK cache depending on val * @param wctx */ static void cache_put (struct WalkContext *wctx, const uint32_t val) { if (RA_FROMFP == val) { // save pc in RA_FROMFP cache if (NULL != AddrTable_RA_FROMFP) { uint64_t idx = wctx->pc % ValTableSize; AddrTable_RA_FROMFP[ idx ] = wctx->pc; if (NULL != AddrTable_RA_EOSTCK) if (AddrTable_RA_EOSTCK[ idx ] == wctx->pc) // invalidate pc in RA_EOSTCK cache AddrTable_RA_EOSTCK[ idx ] = 0; } return; } if (RA_EOSTCK == val) { // save pc in RA_EOSTCK cache if (NULL != AddrTable_RA_EOSTCK) { uint64_t idx = wctx->pc % ValTableSize; AddrTable_RA_EOSTCK[ idx ] = wctx->pc; if (NULL != AddrTable_RA_FROMFP) { if (AddrTable_RA_FROMFP[ idx ] == wctx->pc) // invalidate pc in RA_FROMFP cache AddrTable_RA_FROMFP[ idx ] = 0; } } return; } } static int process_return_real (struct WalkContext *wctx, struct AdvWalkContext *cur, int cache_on) { if ((unsigned long) cur->sp >= wctx->sbase || (unsigned long) cur->sp < wctx->sp) { DprintfT (SP_DUMP_UNWIND, "unwind.c: not in stack: %p [0x%lX-0x%lX]\n", cur->sp, wctx->sp, wctx->sbase); return RA_FAILURE; } unsigned long ra; if (cur->sp == cur->ra_loc) { ra = cur->ra_sav; cur->sp++; } else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase) ra = *cur->sp++; else { DprintfT (SP_DUMP_UNWIND, "unwind.c: not safe: %p >= %p\n", cur->sp, cur->sp_safe); return RA_FAILURE; } if (ra == 0) { if (cache_on) cache_put (wctx, RA_EOSTCK); wctx->pc = ra; wctx->sp = (unsigned long) cur->sp; wctx->fp = (unsigned long) cur->fp; DprintfT (SP_DUMP_UNWIND, "unwind.c:%d RA_END_OF_STACK\n", __LINE__); return RA_END_OF_STACK; } unsigned long tbgn = wctx->tbgn; unsigned long tend = wctx->tend; if (ra < tbgn || ra >= tend) { if (!__collector_check_segment (ra, &tbgn, &tend, 0)) { DprintfT (SP_DUMP_UNWIND, "unwind.c: not in segment: 0x%lX [0x%lX-0x%lX]\n", ra, wctx->tbgn, wctx->tend); return RA_FAILURE; } } if (cur->cval == RA_FROMFP) { if (wctx->fp == (unsigned long) (cur->sp - 2)) { if (cache_on) cache_put (wctx, RA_FROMFP); } else cur->cval = 0; } unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend); if (npc == 0) { if (cur->cval == RA_FROMFP) { /* We have another evidence that we can trust this RA */ DprintfT (SP_DUMP_UNWIND, "unwind.c: trusted fp, pc = 0x%lX\n", wctx->pc); wctx->pc = ra; } else { DprintfT (SP_DUMP_UNWIND, "unwind.c: 0 after adjustment\n"); return RA_FAILURE; } } else wctx->pc = npc; wctx->sp = (unsigned long) cur->sp; wctx->fp = (unsigned long) cur->fp; wctx->tbgn = tbgn; wctx->tend = tend; return RA_SUCCESS; } static int process_return (struct WalkContext *wctx, struct AdvWalkContext *cur) { return process_return_real (wctx, cur, 1); } static void omp_cache_put (unsigned long *cur_sp_safe, struct WalkContext * wctx_pc_save, struct WalkContext *wctx, uint32_t val) { if (omp_no_walk && (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)) { size_t sz = OmpValTableSize * sizeof (*OmpCurCtxs); OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1); sz = OmpValTableSize * sizeof (*OmpCtxs); OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1); sz = OmpValTableSize * sizeof (*OmpVals); OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1); sz = OmpValTableSize * sizeof (*OmpRAs); OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1); } if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL) return; #define USE_18434988_OMP_CACHE_WORKAROUND #ifndef USE_18434988_OMP_CACHE_WORKAROUND uint64_t idx = wctx_pc_save->pc * ROOT_IDX; OmpVals[ idx % OmpValTableSize ] = val; idx = (idx + val) * ROOT_IDX; __collector_memcpy (&(OmpCurCtxs[ idx % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext)); idx = (idx + val) * ROOT_IDX; __collector_memcpy (&(OmpCtxs[ idx % OmpValTableSize ]), wctx, sizeof (struct WalkContext)); #endif unsigned long *sp = NULL; unsigned long fp = wctx_pc_save->fp; int from_fp = 0; if (val == RA_END_OF_STACK) { sp = (unsigned long *) (wctx->sp); sp--; TprintfT (DBG_LT1, "omp_cache_put: get sp from EOS, sp=%p\n", sp); } else { if (fp < wctx_pc_save->sp || fp >= wctx_pc_save->sbase - sizeof (*sp)) { sp = (unsigned long *) (wctx->sp); sp--; TprintfT (DBG_LT1, "omp_cache_put: get sp from sp, sp=%p\n", sp); } else { TprintfT (DBG_LT1, "omp_cache_put: get sp from fp=0x%lx\n", fp); sp = (unsigned long *) fp; from_fp = 1; } } if (sp < cur_sp_safe || ((unsigned long) sp >= wctx->sbase)) return; unsigned long ra = *sp++; if (from_fp) { unsigned long tbgn = wctx_pc_save->tbgn; unsigned long tend = wctx_pc_save->tend; if (ra < tbgn || ra >= tend) { sp = (unsigned long *) (wctx->sp); sp--; ra = *sp++; } } #ifdef USE_18434988_OMP_CACHE_WORKAROUND uint64_t idx1 = wctx_pc_save->pc * ROOT_IDX; uint64_t idx2 = (idx1 + val) * ROOT_IDX; uint64_t idx3 = (idx2 + val) * ROOT_IDX; uint64_t idx4 = (idx3 + val) * ROOT_IDX; OmpRAs [ idx4 % OmpValTableSize ] = 0; // lock OmpVals[ idx1 % OmpValTableSize ] = val; __collector_memcpy (&(OmpCurCtxs[ idx2 % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext)); __collector_memcpy (&(OmpCtxs [ idx3 % OmpValTableSize ]), wctx, sizeof (struct WalkContext)); OmpRAs [ idx4 % OmpValTableSize ] = ra; #else idx = (idx + val) * ROOT_IDX; OmpRAs[ idx % OmpValTableSize ] = ra; #endif TprintfT (DBG_LT1, "omp_cache_put: pc=0x%lx\n", wctx_pc_save->pc); } /* * See bug 17166877 - malloc_internal unwind failure. * Sometimes there are several calls right after ret, like: * leave * ret * call xxx * call xxxx * call xxxxx * If they are also jump targets, we should better not * create new jump context for those, since they may * end up into some other function. */ static int is_after_ret (unsigned char * npc) { if (*npc != 0xe8) return 0; unsigned char * onpc = npc; int ncall = 1; int maxsteps = 10; int mincalls = 3; int steps = 0; while (*(npc - 5) == 0xe8 && steps < maxsteps) { npc -= 5; ncall++; steps++; } if (*(npc - 1) != 0xc3 || *(npc - 2) != 0xc9) return 0; steps = 0; while (*(onpc + 5) == 0xe8 && steps < maxsteps) { onpc += 5; ncall++; steps++; } if (ncall < mincalls) return 0; return 1; } static int find_i386_ret_addr (struct WalkContext *wctx, int do_walk) { if (wctx->sp == 0) // Some artificial contexts may have %sp set to 0. See SETFUNCTIONCONTEXT() return RA_FAILURE; /* Check cached values */ int retc = cache_get (wctx); if (retc != RA_FAILURE) return retc; /* An attempt to perform code analysis for call stack tracing */ unsigned char opcode; unsigned char extop; unsigned char extop2; unsigned char modrm; int imm8; /* immediate operand, byte */ int immv; /* immediate operand, word(2) or doubleword(4) */ int reg; /* register code */ /* Buffer for branch targets (analysis stoppers) */ unsigned char *targets[MAXTRGTS]; int ntrg = 0; /* number of entries in the table */ targets[ntrg++] = (unsigned char*) wctx->pc; targets[ntrg++] = (unsigned char*) - 1; struct AdvWalkContext buf[MAXCTX]; struct AdvWalkContext *cur = buf; CALL_UTIL (memset)((void*) cur, 0, sizeof (*cur)); cur->pc = (unsigned char*) wctx->pc; cur->sp = (unsigned long*) wctx->sp; cur->sp_safe = cur->sp - RED_ZONE; /* allow for the 128-byte red zone on amd64 */ cur->fp = (unsigned long*) wctx->fp; cur->tidx = 1; DprintfT (SP_DUMP_UNWIND, "\nstack_unwind (x86 walk):%d %p start\n", __LINE__, cur->pc); int nctx = 1; /* number of contexts being processed */ int cnt = 8192; /* number of instructions to analyse */ /* * The basic idea of our x86 stack unwind is that we don't know * if we can trust the frame-pointer register. So we walk * instructions to find a return instruction, at which point * we know the return address is on the top of the stack, etc. * * A severe challenge to walking x86 instructions is when we * encounter "jmp *(reg)" instructions, where we are expected * to jump to the (unknown-to-us) contents of a register. * * The "jmp_reg" code here attempts to keep track of the * context for such a jump, deferring any handling of such * a difficult case. We continue with other contexts, hoping * that some other walk will take us to a return instruction. * * If no other walk helps, we return to "jmp_reg" contexts. * While we don't know the jump target, it is possible that the * bytes immediately following the jmp_reg instruction represent * one possible target, as might be the case when a "switch" * statement is compiled. * * Unfortunately, the bytes following a "jmp_reg" instruction might * instead be a jump target from somewhere else -- execution might * never "fall through" from the preceding "jmp_reg". Those bytes * might not even be instructions at all. There are many uses of * jmp_reg instructions beyond just compiling switch statements. * * So walking the bytes after a "jmp_reg" instruction can lead * to bugs and undefined behavior, including SEGV and core dump. * * We currently do not really understand the "jmp_reg" code below. */ int jmp_reg_switch_mode = 0; int num_jmp_reg = 0; // number of jmp *reg met when switch mode is off or when in current switch case int total_num_jmp_reg = 0; // number of total jmp *reg met struct AdvWalkContext * jmp_reg_ctx[MAXJMPREG]; // context of jmp *reg met when switch mode is off or when in current switch case struct AdvWalkContext * jmp_reg_switch_ctx[MAXJMPREG]; // context of jmp *reg used in switch cases struct AdvWalkContext * jmp_reg_switch_backup_ctx = NULL; // context of the first jmp *reg used in switch cases int cur_jmp_reg_switch = 0; // current switch table int num_jmp_reg_switch = 0; // number of switch table int jmp_reg_switch_case = 0; // case number in current switch table unsigned char * jmp_reg_switch_pc = NULL; // the start pc of current switch case unsigned char * jmp_reg_switch_pc_old = NULL; // backup for deleteing context of jump target unsigned char * jmp_reg_switch_base = NULL; // start pc for checking offsets int max_jmp_reg_switch_case = 2; #if WSIZE(32) int max_switch_pc_offset = 512; #else // WSIZE(64) int max_switch_pc_offset = 1024; #endif int expected_num_jmp_reg = 1; // should be smaller than MAXJMPREG int max_num_jmp_reg_seen = 4; // try to resolve return if there are so many such instructions int save_ctx = 0; // flag to save walk context in the cache to speed up unwind struct WalkContext wctx_pc_save; if (do_walk == 0) // do_walk is the flag indicating not walking through the instructions, resolving the RA from the stack fp first __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext)); startWalk: if (do_walk == 0) { // try to resolve RA from stack frame pointer if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL) { do_walk = 1; goto startWalk; } // before goto checkFP, try the RA from cache (key: WalkContext -> value: caller's WalkContext)) uint64_t idx = wctx->pc * ROOT_IDX; uint32_t val = OmpVals[idx % OmpValTableSize]; idx = (idx + val) * ROOT_IDX; #ifdef USE_18434988_OMP_CACHE_WORKAROUND // Check ra: if it is 0 - then cache is invalid uint64_t idx4; idx4 = (idx + val) * ROOT_IDX; idx4 = (idx4 + val) * ROOT_IDX; if (0 == OmpRAs[ idx4 % OmpValTableSize ]) // Invalid cache goto checkFP; #endif struct WalkContext saved_ctx; __collector_memcpy (&saved_ctx, &OmpCurCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext)); if (wctx->pc == saved_ctx.pc && wctx->sp == saved_ctx.sp && wctx->fp == saved_ctx.fp && wctx->tbgn == saved_ctx.tbgn && wctx->tend == saved_ctx.tend) { // key match, RA may be valid idx = (idx + val) * ROOT_IDX; unsigned long *sp = NULL; unsigned long fp = wctx->fp; int from_fp = 0; if (val == RA_END_OF_STACK) { DprintfT (SP_DUMP_UNWIND, "find_i386_ret_addr:%d -- RA_END_OF_STACK: pc=0x%lx\n", __LINE__, wctx->pc); __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext)); return val; } else { if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp)) { TprintfT (DBG_LT1, "omp_cache_get -- wrong fp: pc=0x%lx\n", wctx->pc); sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp); sp--; if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase) { goto checkFP; } unsigned long ra = *sp; uint64_t idx2 = (idx + val) * ROOT_IDX; if (OmpRAs[ idx2 % OmpValTableSize ] == ra) { __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext)); TprintfT (DBG_LT1, "omp_cache_get -- ra match with target sp: pc=0x%lx, ra=0x%lx, val=%d\n", wctx->pc, ra, val); return val; } TprintfT (DBG_LT1, "omp_cache_get -- ra mismatch: ra=0x%lx, expected ra=0x%lx, val=%d\n", ra, OmpRAs[ idx2 % OmpValTableSize ], val); goto checkFP; } sp = (unsigned long *) fp; from_fp = 1; } uint64_t idx2 = (idx + val) * ROOT_IDX; unsigned long ra = *sp++; if (from_fp) { unsigned long tbgn = wctx->tbgn; unsigned long tend = wctx->tend; if (ra < tbgn || ra >= tend) { sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp); sp--; //if (sp < cur->sp_safe - 16 || (unsigned long)sp >= wctx->sbase - sizeof(*sp)) { // The check above was replaced with the check below, // because we do not know why "- 16" and "- sizeof(*sp)" was used. if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase) goto checkFP; else ra = *sp; } } if (OmpRAs[ idx2 % OmpValTableSize ] == ra) { TprintfT (DBG_LT1, "omp_cache_get -- ra match: pc=0x%lx\n", wctx->pc); __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext)); return val; } } goto checkFP; } else { CALL_UTIL (memset)(jmp_reg_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *)); CALL_UTIL (memset)(jmp_reg_switch_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *)); } while (cnt--) { if (nctx == 0 && (num_jmp_reg == expected_num_jmp_reg || jmp_reg_switch_mode == 1)) { // no context available, try jmp switch mode int i = 0; if (num_jmp_reg == expected_num_jmp_reg) jmp_reg_switch_mode = 0; // first jmp reg expected, restart switch mode DprintfT (SP_DUMP_UNWIND, "unwind.c: begin switch mode, num_jmp_reg = %d, jmp_reg_switch_backup_ctx=%p, jmp_reg_switch_case=%d, jmp_reg_switch_mode=%d.\n", num_jmp_reg, jmp_reg_switch_backup_ctx, jmp_reg_switch_case, jmp_reg_switch_mode); // the ideal asm of switch is // jmp reg // ...//case 1 // ret // ...//case 2 // ret // ...//etc if (jmp_reg_switch_mode == 0) { num_jmp_reg_switch = num_jmp_reg; // backup num_jmp_reg jmp_reg_switch_mode = 1; // begin switch mode for (i = 0; i < num_jmp_reg_switch; i++) { if (jmp_reg_switch_ctx[i] == NULL) jmp_reg_switch_ctx[i] = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_ctx[i])); if (jmp_reg_switch_ctx[i] != NULL) { // backup jmp_reg_ctx __collector_memcpy (jmp_reg_switch_ctx[i], jmp_reg_ctx[i], sizeof (*jmp_reg_switch_ctx[i])); cur_jmp_reg_switch = 0; // reset the current switch table jmp_reg_switch_case = 0; // reset the case number in current switch table } } if (jmp_reg_switch_backup_ctx == NULL) { // only backup when the first jmp *reg is met for restoring later, if switch mode fails to resolve RA jmp_reg_switch_backup_ctx = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_backup_ctx)); if (jmp_reg_switch_backup_ctx != NULL) __collector_memcpy (jmp_reg_switch_backup_ctx, cur, sizeof (*cur)); DprintfT (SP_DUMP_UNWIND, "unwind.c: back up context for switch mode.\n"); } } if (jmp_reg_switch_mode == 1) { // in the process of trying switch cases if (cur_jmp_reg_switch == num_jmp_reg_switch) { DprintfT (SP_DUMP_UNWIND, "unwind.c: have tried all switch with max_jmp_reg_switch_case for each\n"); if (jmp_reg_switch_backup_ctx != NULL) __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur)); int rc = process_return_real (wctx, cur, 0); if (rc == RA_SUCCESS) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } break; // have tried all switch with max_jmp_reg_switch_case for each, goto checkFP } unsigned char *npc = jmp_reg_switch_ctx[cur_jmp_reg_switch]->pc; if (jmp_reg_switch_case == 0) // first switch case npc = check_modrm (npc); // pc next to "jmp reg" instruction else if (jmp_reg_switch_pc != NULL) npc = jmp_reg_switch_pc; // // pc next to "ret" instruction of previous case else { DprintfT (SP_DUMP_UNWIND, "unwind.c: unexpected jum switch mode situation, jmp_reg_switch_case=%d, jmp_reg_switch_pc=%p\n", jmp_reg_switch_case, jmp_reg_switch_pc); break; //goto checkFP } jmp_reg_switch_base = npc; struct AdvWalkContext *new = buf + nctx; nctx += 1; __collector_memcpy (new, jmp_reg_switch_ctx[cur_jmp_reg_switch], sizeof (*new)); new->pc = npc; cur = new; /* advance the new context first */ jmp_reg_switch_pc = NULL; jmp_reg_switch_case++; if (jmp_reg_switch_case == max_jmp_reg_switch_case) { // done many cases, change to another switch table cur_jmp_reg_switch++; jmp_reg_switch_case = 0; } } num_jmp_reg = 0; } if (jmp_reg_switch_mode == 1) { // when processing switch cases, check pc each time unsigned long tbgn = wctx->tbgn; unsigned long tend = wctx->tend; if ((unsigned long) (cur->pc) < tbgn || (unsigned long) (cur->pc) >= tend) { DprintfT (SP_DUMP_UNWIND, "unwind.c: pc out of range, pc=0x%lx\n", (unsigned long) (cur->pc)); break; } if (jmp_reg_switch_base != NULL && cur->pc > jmp_reg_switch_base + max_switch_pc_offset) { DprintfT (SP_DUMP_UNWIND, "unwind.c: limit the walk offset after jmp reg instruction\n"); if (jmp_reg_switch_backup_ctx != NULL) __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur)); int rc = process_return_real (wctx, cur, 0); if (rc == RA_SUCCESS) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } break; // limit the walk offset after jmp reg instruction, got checkFP } } if (nctx == 0) break; // dump_targets (__LINE__, ntrg, targets); while (cur->pc > targets[cur->tidx]) cur->tidx += 1; if (cur->pc == targets[cur->tidx]) { /* Stop analysis. Delete context. */ if (jmp_reg_switch_mode == 0 || cur->pc != jmp_reg_switch_pc_old) { if (jmp_reg_switch_mode == 1 && nctx == 1 && jmp_reg_switch_pc == NULL) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d old target, cur->pc=%p, jmp_reg_switch_pc=%p, nctx=%d\n", __LINE__, cur->pc, jmp_reg_switch_pc, nctx); jmp_reg_switch_pc = cur->pc; // save cp before delete context, may be used as a start of switch case jmp_reg_switch_pc_old = jmp_reg_switch_pc; } DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, old target.\n", __LINE__); DELETE_CURCTX (); if (cur >= buf + nctx) cur = buf; continue; } if (jmp_reg_switch_mode == 1 && cur->pc == jmp_reg_switch_pc_old) jmp_reg_switch_pc_old = NULL; // reset jmp_reg_switch_pc_old to delete the context later when cur->pc != jmp_reg_switch_pc_old } /* let's walk the next x86 instruction */ DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cur:%ld pc=0x%lx %02x %02x %02x %02x %02x %02x %02x sp=0x%lx\n", __LINE__, (long) (cur - buf), (unsigned long) cur->pc, (int) cur->pc[0], (int) cur->pc[1], (int) cur->pc[2], (int) cur->pc[3], (int) cur->pc[4], (int) cur->pc[5], (int) cur->pc[6], (unsigned long) cur->sp); int v = 4; /* Operand size */ int a = 4; /* Address size */ /* int W = 0; REX.W bit */ #if WSIZE(64) int R = 0; /* REX.R bit */ #endif int X = 0; /* REX.X bit */ int B = 0; /* REX.B bit */ /* Check prefixes */ int done = 0; while (!done) { opcode = *cur->pc++; switch (opcode) { case 0x66: /* opd size override */ v = 2; break; case 0x67: /*addr size override */ a = 2; break; #if WSIZE(64) case 0x40: /* REX */ case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f: B = (opcode & 0x1) ? 8 : 0; X = (opcode & 0x2) ? 8 : 0; R = (opcode & 0x4) ? 8 : 0; if (opcode & 0x8) /* 64 bit operand size */ v = 8; opcode = *cur->pc++; done = 1; break; #endif default: done = 1; break; } } int z = (v == 8) ? 4 : v; switch (opcode) { case 0x0: /* add Eb,Gb */ case 0x01: /* add Ev,Gv */ case 0x02: /* add Gb,Eb */ case 0x03: /* add Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0x04: /* add %al,Ib */ cur->pc += 1; break; case 0x05: /* add %eax,Iz */ cur->pc += z; break; case 0x06: /* push es */ cur->sp -= 1; break; case 0x07: /* pop es */ cur->sp += 1; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; break; case 0x08: /* or Eb,Gb */ case 0x09: /* or Ev,Gv */ case 0x0a: /* or Gb,Eb */ case 0x0b: /* or Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0x0c: /* or %al,Ib */ cur->pc += 1; break; case 0x0d: /* or %eax,Iz */ cur->pc += z; break; case 0x0e: /* push cs */ cur->sp -= 1; break; case 0x0f: /* two-byte opcodes */ extop = *cur->pc++; switch (extop) { /* RTM or HLE */ case 0x01: extop2 = *cur->pc; switch (extop2) { case 0xd5: /* xend */ case 0xd6: /* xtest */ cur->pc++; break; default: break; } break; case 0x03: cur->pc = check_modrm (cur->pc); break; case 0x0b: DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, undefined instruction. opcode=0x%02x\n", __LINE__, (int) opcode); DELETE_CURCTX (); break; case 0x05: /* syscall */ case 0x34: /* sysenter */ if (cur->rax == __NR_exit) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n", __LINE__, (int) opcode); DELETE_CURCTX (); break; } else if (cur->rax == __NR_rt_sigreturn) { if (jmp_reg_switch_mode == 1) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0x%02x\n", __LINE__, (int) opcode); goto checkFP; } wctx->sp = (unsigned long) cur->sp; if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN); return RA_RT_SIGRETURN; } #if WSIZE(32) else if (cur->rax == __NR_sigreturn) { if (jmp_reg_switch_mode == 1) { DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0x34\n"); goto checkFP; } wctx->sp = (unsigned long) cur->sp; if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN); return RA_SIGRETURN; } #endif /* Check for Linus' trick in the vsyscall page */ while (*cur->pc == 0x90) /* nop */ cur->pc++; if (*cur->pc == 0xeb) /* jmp imm8 */ cur->pc += 2; break; case 0x0d: /* nop Ev */ cur->pc = check_modrm (cur->pc); break; case 0x10: /* xmm Vq,Wq */ case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: cur->pc = check_modrm (cur->pc); break; case 0x18: /* prefetch */ cur->pc = check_modrm (cur->pc); break; case 0x1E: /* endbr64/endbr32 (f3 0f 1e .. ) is parsing as repz nop edx */ cur->pc += 2; break; case 0x1f: /* nop Ev */ cur->pc = check_modrm (cur->pc); break; case 0x28: /* xmm Vq,Wq */ case 0x29: case 0x2a: case 0x2b: case 0x2c: case 0x2d: case 0x2e: case 0x2f: cur->pc = check_modrm (cur->pc); break; case 0x30: /* wrmsr */ case 0x31: /* rdtsc */ case 0x32: /* rdmsr */ case 0x33: /* rdpmc */ break; /* case 0x34: sysenter (see above) */ case 0x38: case 0x3a: extop2 = *cur->pc++; cur->pc = check_modrm (cur->pc); // 21275311 Unwind failure in native stack for java application running on jdk8 // Three-byte opcodes "66 0f 3a ??" should consume an additional "immediate" byte. if (extop == 0x3a) cur->pc++; break; case 0x40: case 0x41: case 0x42: case 0x43: /* CMOVcc Gv,Ev */ case 0x44: case 0x45: case 0x46: case 0x47: case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f: cur->pc = check_modrm (cur->pc); break; case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: case 0x58: case 0x59: case 0x5a: case 0x5b: case 0x5c: case 0x5d: case 0x5e: case 0x5f: case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: case 0x68: case 0x69: case 0x6a: case 0x6b: case 0x6c: case 0x6d: case 0x6e: case 0x6f: cur->pc = check_modrm (cur->pc); break; case 0x70: case 0x71: case 0x72: case 0x73: cur->pc = check_modrm (cur->pc) + 1; break; case 0x74: case 0x75: case 0x76: cur->pc = check_modrm (cur->pc); break; case 0x77: break; case 0x7c: case 0x7d: case 0x7e: case 0x7f: cur->pc = check_modrm (cur->pc); break; case 0x80: case 0x81: case 0x82: case 0x83: /* Jcc Jz */ case 0x84: case 0x85: case 0x86: case 0x87: case 0x88: case 0x89: case 0x8a: case 0x8b: case 0x8c: case 0x8d: case 0x8e: case 0x8f: immv = read_int (cur->pc, z); cur->pc += z; if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX)) { int tidx = 0; unsigned char *npc = cur->pc + immv; if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n", __LINE__, (int) opcode); DELETE_CURCTX (); break; } if (is_after_ret (npc)) break; while (npc > targets[tidx]) tidx += 1; if (npc != targets[tidx]) { if (ntrg < MAXTRGTS) { for (int i = 0; i < nctx; i++) if (buf[i].tidx >= tidx) buf[i].tidx++; /* insert a new target */ for (int i = ntrg; i > tidx; i--) targets[i] = targets[i - 1]; ntrg += 1; targets[tidx++] = npc; } else DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg=max(%d)\n", __LINE__, ntrg); struct AdvWalkContext *new = buf + nctx; nctx += 1; __collector_memcpy (new, cur, sizeof (*new)); new->pc = npc; new->tidx = tidx; cur = new; /* advance the new context first */ continue; } } else DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx=max(%d)\n", __LINE__, ntrg); break; case 0x90: case 0x91: case 0x92: case 0x93: /* setcc Eb */ case 0x94: case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9a: case 0x9b: case 0x9c: case 0x9d: case 0x9e: case 0x9f: cur->pc = check_modrm (cur->pc); break; case 0xa0: /* push fs */ cur->sp -= 1; break; case 0xa1: /* pop fs */ cur->sp += 1; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; break; case 0xa2: /* cpuid */ break; case 0xa3: /* bt Ev,Gv */ cur->pc = check_modrm (cur->pc); break; case 0xa4: /* shld Ev,Gv,Ib */ cur->pc = check_modrm (cur->pc); cur->pc += 1; break; case 0xa5: /* shld Ev,Gv,%cl */ cur->pc = check_modrm (cur->pc); break; case 0xa8: /* push gs */ cur->sp -= 1; break; case 0xa9: /* pop gs */ cur->sp += 1; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; break; case 0xaa: /* rsm */ break; case 0xab: /* bts Ev,Gv */ cur->pc = check_modrm (cur->pc); break; case 0xac: /* shrd Ev,Gv,Ib */ cur->pc = check_modrm (cur->pc); cur->pc += 1; break; case 0xad: /* shrd Ev,Gv,%cl */ cur->pc = check_modrm (cur->pc); break; case 0xae: /* group15 */ cur->pc = check_modrm (cur->pc); break; case 0xaf: /* imul Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0xb1: /* cmpxchg Ev,Gv */ cur->pc = check_modrm (cur->pc); break; case 0xb3: case 0xb6: /* movzx Gv,Eb */ case 0xb7: /* movzx Gv,Ew */ cur->pc = check_modrm (cur->pc); break; case 0xba: /* group8 Ev,Ib */ cur->pc = check_modrm (cur->pc); cur->pc += 1; break; case 0xbb: /* btc Ev,Gv */ case 0xbc: /* bsf Gv,Ev */ case 0xbd: /* bsr Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0xbe: /* movsx Gv,Eb */ case 0xbf: /* movsx Gv,Ew */ cur->pc = check_modrm (cur->pc); break; case 0xc0: /* xadd Eb,Gb */ case 0xc1: /* xadd Ev,Gv */ cur->pc = check_modrm (cur->pc); break; case 0xc2: /* cmpps V,W,Ib */ cur->pc = check_modrm (cur->pc); cur->pc += 1; break; case 0xc3: /* movnti M,G */ cur->pc = check_modrm (cur->pc); break; case 0xc6: /* shufps V,W,Ib */ cur->pc = check_modrm (cur->pc); cur->pc += 1; break; case 0xc7: /* RDRAND */ cur->pc = check_modrm (cur->pc); break; case 0xc8: case 0xc9: case 0xca: case 0xcb: /* bswap */ case 0xcc: case 0xcd: case 0xce: case 0xcf: break; case 0xd0: case 0xd1: case 0xd2: case 0xd3: case 0xd4: case 0xd5: case 0xd6: case 0xd7: case 0xd8: case 0xd9: case 0xda: case 0xdb: case 0xdc: case 0xdd: case 0xde: case 0xdf: case 0xe0: case 0xe1: case 0xe2: case 0xe3: case 0xe4: case 0xe5: case 0xe6: case 0xe7: case 0xe8: case 0xe9: case 0xea: case 0xeb: case 0xec: case 0xed: case 0xee: case 0xef: case 0xf0: case 0xf1: case 0xf2: case 0xf3: case 0xf4: case 0xf5: case 0xf6: case 0xf7: case 0xf8: case 0xf9: case 0xfa: case 0xfb: case 0xfc: case 0xfd: case 0xfe: case 0xff: cur->pc = check_modrm (cur->pc); break; default: if (jmp_reg_switch_mode == 1 && extop == 0x0b) DprintfT (SP_DUMP_UNWIND, "unwind.c:%d invalid opcode ub2: 0x0f %x jmp_reg_switch_mode=%d\n", __LINE__, (int) extop, jmp_reg_switch_mode); else { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x0f %x jmp_reg_switch_mode=%d\n", __LINE__, (int) extop, jmp_reg_switch_mode); DELETE_CURCTX (); } break; } break; case 0x10: /* adc Eb,Gb */ case 0x11: /* adc Ev,Gv */ case 0x12: /* adc Gb,Eb */ case 0x13: /* adc Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0x14: /* adc %al,Ib */ cur->pc += 1; break; case 0x15: /* adc %eax,Iz */ cur->pc += z; break; case 0x16: /* push ss */ cur->sp -= 1; break; case 0x17: /* pop ss */ cur->sp += 1; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; break; case 0x18: /* sbb Eb,Gb */ case 0x19: /* sbb Ev,Gv */ case 0x1a: /* sbb Gb,Eb */ case 0x1b: /* sbb Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0x1c: /* sbb %al,Ib */ cur->pc += 1; break; case 0x1d: /* sbb %eax,Iz */ cur->pc += z; break; case 0x1e: /* push ds */ cur->sp -= 1; break; case 0x1f: /* pop ds */ cur->sp += 1; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; break; case 0x20: /* and Eb,Gb */ case 0x21: /* and Ev,Gv */ case 0x22: /* and Gb,Eb */ case 0x23: /* and Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0x24: /* and %al,Ib */ cur->pc += 1; break; case 0x25: /* and %eax,Iz */ cur->pc += z; break; case 0x26: /* seg=es prefix */ break; case 0x27: /* daa */ break; case 0x28: /* sub Eb,Gb */ case 0x29: /* sub Ev,Gv */ case 0x2a: /* sub Gb,Eb */ case 0x2b: /* sub Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0x2c: /* sub %al,Ib */ cur->pc += 1; break; case 0x2d: /* sub %eax,Iz */ cur->pc += z; break; case 0x2e: /* seg=cs prefix */ break; case 0x2f: /* das */ break; case 0x30: /* xor Eb,Gb */ case 0x31: /* xor Ev,Gv */ case 0x32: /* xor Gb,Eb */ case 0x33: /* xor Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0x34: /* xor %al,Ib */ cur->pc += 1; break; case 0x35: /* xor %eax,Iz */ cur->pc += z; break; case 0x36: /* seg=ss prefix */ break; case 0x37: /* aaa */ break; case 0x38: /* cmp Eb,Gb */ case 0x39: /* cmp Ev,Gv */ case 0x3a: /* cmp Gb,Eb */ case 0x3b: /* cmp Gv,Ev */ cur->pc = check_modrm (cur->pc); break; case 0x3c: /* cmp %al,Ib */ cur->pc += 1; break; case 0x3d: /* cmp %eax,Iz */ cur->pc += z; break; case 0x3e: /* seg=ds prefix */ break; case 0x3f: /* aas */ break; #if WSIZE(32) case 0x40: /* inc %eax */ case 0x41: /* inc %ecx */ case 0x42: /* inc %edx */ case 0x43: /* inc %ebx */ break; case 0x44: /* inc %esp */ /* Can't be a valid stack pointer - delete context */ DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x44.\n", __LINE__); DELETE_CURCTX (); break; case 0x45: /* inc %ebp */ case 0x46: /* inc %esi */ case 0x47: /* inc %edi */ case 0x48: /* dec %eax */ case 0x49: /* dec %ecx */ case 0x4a: /* dec %edx */ case 0x4b: /* dec %ebx */ break; case 0x4c: /* dec %esp */ /* Can't be a valid stack pointer - delete context */ DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x4c.\n", __LINE__); DELETE_CURCTX (); break; case 0x4d: /* dec %ebp */ case 0x4e: /* dec %esi */ case 0x4f: /* dec %edi */ break; #endif case 0x50: /* push %eax */ case 0x51: /* push %ecx */ case 0x52: /* push %edx */ case 0x53: /* push %ebx */ case 0x54: /* push %esp */ case 0x55: /* push %ebp */ case 0x56: /* push %esi */ case 0x57: /* push %edi */ cur->sp -= 1; reg = OPC_REG (opcode); if (reg == RBP) { #if 0 /* Don't do this check yet. Affects tail calls. */ /* avoid other function's prologue */ if ((cur->pc[0] == 0x89 && cur->pc[1] == 0xe5) || (cur->pc[0] == 0x8b && cur->pc[1] == 0xec)) { /* mov %esp,%ebp */ DELETE_CURCTX (); break; } #endif if (cur->fp_loc == NULL) { cur->fp_loc = cur->sp; cur->fp_sav = cur->fp; } } break; case 0x58: /* pop %eax */ case 0x59: /* pop %ecx */ case 0x5a: /* pop %edx */ case 0x5b: /* pop %ebx */ case 0x5c: /* pop %esp */ case 0x5d: /* pop %ebp */ case 0x5e: /* pop %esi */ case 0x5f: /* pop %edi */ reg = OPC_REG (opcode); cur->regs[reg] = 0; if (isInside ((unsigned long) cur->sp, (unsigned long) cur->sp_safe, wctx->sbase)) cur->regs[reg] = *cur->sp; DprintfT (SP_DUMP_UNWIND, "stack_unwind:%d cur->regs[%d]=0x%lx\n", __LINE__, reg, (unsigned long) cur->regs[reg]); if (reg == RDX) { if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase) cur->rdx = *cur->sp; } else if (reg == RBP) { if (cur->fp_loc == cur->sp) { cur->fp = cur->fp_sav; cur->fp_loc = NULL; } else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase) cur->fp = (unsigned long*) (*cur->sp); } else if (reg == RSP) { /* f.e. JVM I2CAdapter */ if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase) { unsigned long *nsp = (unsigned long*) (*cur->sp); if (nsp >= cur->sp && nsp <= cur->fp) { cur->sp = nsp; } else { DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address, opcode=0x%02x\n", __LINE__, opcode); goto checkFP; } } else { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode=0x%02x\n", __LINE__, opcode); goto checkFP; } break; } cur->sp += 1; if (cur->sp - RED_ZONE > cur->sp_safe) { cur->sp_safe = cur->sp - RED_ZONE; } break; case 0x60: /* pusha(d) */ cur->sp -= 8; break; case 0x61: /* popa(d) */ cur->sp += 8; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; break; case 0x62: /* group AVX, 4-bytes EVEX prefix */ { unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction int len = parse_x86_AVX_instruction (pc); if (len < 4) { DELETE_CURCTX (); } else { pc += len; cur->pc = pc; } } break; case 0x63: /* arpl Ew,Gw (32) movsxd Gv,Ev (64)*/ cur->pc = check_modrm (cur->pc); break; case 0x64: /* seg=fs prefix */ case 0x65: /* seg=gs prefix */ break; case 0x66: /* opd size override */ case 0x67: /* addr size override */ break; case 0x68: /* push Iz */ cur->sp = (unsigned long*) ((long) cur->sp - z); cur->pc += z; break; case 0x69: /* imul Gv,Ev,Iz */ cur->pc = check_modrm (cur->pc); cur->pc += z; break; case 0x6a: /* push Ib */ cur->sp = (unsigned long*) ((long) cur->sp - v); cur->pc += 1; break; case 0x6b: /* imul Gv,Ev,Ib */ cur->pc = check_modrm (cur->pc); cur->pc += 1; break; case 0x6c: case 0x6d: case 0x6e: case 0x6f: cur->pc = check_modrm (cur->pc); break; case 0x70: /* jo Jb */ case 0x71: /* jno Jb */ case 0x72: /* jb Jb */ case 0x73: /* jnb Jb */ case 0x74: /* jz Jb */ case 0x75: /* jnz Jb */ case 0x76: /* jna Jb */ case 0x77: /* ja Jb */ case 0x78: /* js Jb */ case 0x79: /* jns Jb */ case 0x7a: /* jp Jb */ case 0x7b: /* jnp Jb */ case 0x7c: /* jl Jb */ case 0x7d: /* jge Jb */ case 0x7e: /* jle Jb */ case 0x7f: /* jg Jb */ imm8 = *(char*) cur->pc++; if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX)) { int tidx = 0; unsigned char *npc = cur->pc + imm8; if (is_after_ret (npc)) break; while (npc > targets[tidx]) tidx += 1; if (npc != targets[tidx]) { if (ntrg < MAXTRGTS) { for (int i = 0; i < nctx; i++) if (buf[i].tidx >= tidx) buf[i].tidx++; /* insert a new target */ for (int i = ntrg; i > tidx; i--) targets[i] = targets[i - 1]; ntrg += 1; targets[tidx++] = npc; } else DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg(%d)=max\n", __LINE__, ntrg); struct AdvWalkContext *new = buf + nctx; nctx += 1; __collector_memcpy (new, cur, sizeof (*new)); new->pc = npc; new->tidx = tidx; cur = new; /* advance the new context first */ continue; } } else DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx(%d)=max\n", __LINE__, nctx); break; case 0x80: /* group1 Eb,Ib */ cur->pc = check_modrm (cur->pc); cur->pc += 1; break; case 0x81: /* group1 Ev,Iz */ modrm = *cur->pc; if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP) { int immz = read_int (cur->pc + 1, z); extop = MRM_EXT (modrm); if (extop == 0) /* add imm32,%esp */ cur->sp = (unsigned long*) ((long) cur->sp + immz); else if (extop == 4) /* and imm32,%esp */ cur->sp = (unsigned long*) ((long) cur->sp & immz); else if (extop == 5) /* sub imm32,%esp */ cur->sp = (unsigned long*) ((long) cur->sp - immz); if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; } cur->pc = check_modrm (cur->pc); cur->pc += z; break; case 0x82: /* group1 Eb,Ib */ cur->pc = check_modrm (cur->pc); cur->pc += 1; break; case 0x83: /* group1 Ev,Ib */ modrm = *cur->pc; if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP) { imm8 = (char) cur->pc[1]; /* sign extension */ extop = MRM_EXT (modrm); if (extop == 0) /* add imm8,%esp */ cur->sp = (unsigned long*) ((long) cur->sp + imm8); else if (extop == 4) /* and imm8,%esp */ cur->sp = (unsigned long*) ((long) cur->sp & imm8); else if (extop == 5) /* sub imm8,%esp */ cur->sp = (unsigned long*) ((long) cur->sp - imm8); if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; } cur->pc = check_modrm (cur->pc); cur->pc += 1; break; case 0x84: /* test Eb,Gb */ case 0x85: /* test Ev,Gv */ case 0x86: /* xchg Eb,Gb */ case 0x87: /* xchg Ev,Gv */ cur->pc = check_modrm (cur->pc); break; case 0x88: /* mov Eb,Gb */ cur->pc = check_modrm (cur->pc); break; case 0x89: /* mov Ev,Gv */ modrm = *cur->pc; if (MRM_MOD (modrm) == 0xc0) { if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP) /* movl %esp,%ebp */ cur->fp = cur->sp; else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) { /* mov %ebp,%esp */ cur->sp = cur->fp; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; if (wctx->fp == (unsigned long) cur->sp) cur->cval = RA_FROMFP; } } else if (MRM_MOD (modrm) == 0x80) { if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) { if (cur->pc[1] == 0x24) { /* mov %ebp,disp32(%esp) - JVM */ immv = read_int (cur->pc + 2, 4); cur->fp_loc = (unsigned long*) ((char*) cur->sp + immv); cur->fp_sav = cur->fp; } } } else if (MRM_MOD (modrm) == 0x40) { if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX) { if (cur->pc[1] == 0x24 && cur->pc[2] == 0x0) { /* movl %edx,0(%esp) */ cur->ra_loc = cur->sp; cur->ra_sav = cur->rdx; } } else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) { if (cur->pc[1] == 0x24) { /* mov %ebp,disp8(%esp) - JVM */ imm8 = ((char*) (cur->pc))[2]; cur->fp_loc = (unsigned long*) ((char*) cur->sp + imm8); cur->fp_sav = cur->fp; } } } else if (MRM_MOD (modrm) == 0x0) { if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) { if (cur->pc[1] == 0x24) { /* mov %ebp,(%esp) */ cur->fp_loc = cur->sp; cur->fp_sav = cur->fp; } } else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX) { if (cur->pc[1] == 0x24) { /* movl %edx,(%esp) */ cur->ra_loc = cur->sp; cur->ra_sav = cur->rdx; } } } cur->pc = check_modrm (cur->pc); break; case 0x8a: /* mov Gb,Eb */ cur->pc = check_modrm (cur->pc); break; case 0x8b: /* mov Gv,Ev */ modrm = *cur->pc; if (MRM_MOD (modrm) == 0xc0) { if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) /* mov %esp,%ebp */ cur->fp = cur->sp; else if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP) { /* mov %ebp,%esp */ cur->sp = cur->fp; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; if (wctx->fp == (unsigned long) cur->sp) cur->cval = RA_FROMFP; } } else if (MRM_MOD (modrm) == 0x80) { if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) { if (cur->pc[1] == 0x24) { /* mov disp32(%esp),%ebp */ immv = read_int (cur->pc + 2, 4); unsigned long *ptr = (unsigned long*) ((char*) cur->sp + immv); if (cur->fp_loc == ptr) { cur->fp = cur->fp_sav; cur->fp_loc = NULL; } else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase) cur->fp = (unsigned long*) (*ptr); } } } else if (MRM_MOD (modrm) == 0x40) { if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) { if (cur->pc[1] == 0x24) { /* mov disp8(%esp),%ebp - JVM */ imm8 = ((char*) (cur->pc))[2]; unsigned long *ptr = (unsigned long*) ((char*) cur->sp + imm8); if (cur->fp_loc == ptr) { cur->fp = cur->fp_sav; cur->fp_loc = NULL; } else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase) cur->fp = (unsigned long*) (*ptr); } } } else if (MRM_MOD (modrm) == 0x0) { if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP) { if (cur->pc[1] == 0x24) { /* mov (%esp),%ebp */ if (cur->fp_loc == cur->sp) { cur->fp = cur->fp_sav; cur->fp_loc = NULL; } else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase) cur->fp = (unsigned long*) *cur->sp; } } } cur->pc = check_modrm (cur->pc); break; case 0x8c: /* mov Mw,Sw */ cur->pc = check_modrm (cur->pc); break; case 0x8d: /* lea Gv,M */ modrm = *cur->pc; if (MRM_REGD (modrm) == RSP) { unsigned char *pc = cur->pc; // Mez: need to use always regs[RSP/RBP] instead cur->sp(or fp): cur->regs[RSP] = (unsigned long) cur->sp; cur->regs[RBP] = (unsigned long) cur->fp; cur->pc++; int mod = (modrm >> 6) & 3; int r_m = modrm & 7; long val = 0; int undefRez = 0; if (mod == 0x3) val = getRegVal (cur, MRM_REGS (modrm), &undefRez); else if (r_m == 4) { // SP or R12. Decode SIB-byte. int sib = *cur->pc++; int scale = 1 << (sib >> 6); int index = X | ((sib >> 3) & 7); int base = B | (sib & 7); if (mod == 0) { if ((base & 7) == 5) { // BP or R13 if (index != 4) // SP val += getRegVal (cur, index, &undefRez) * scale; val += read_int (cur->pc, 4); cur->pc += 4; } else { val += getRegVal (cur, base, &undefRez); if (index != 4) // SP val += getRegVal (cur, index, &undefRez) * scale; } } else { val += getRegVal (cur, base, &undefRez); if (index != 4) // SP val += getRegVal (cur, index, &undefRez) * scale; if (mod == 1) { val += read_int (cur->pc, 1); cur->pc++; } else { // mod == 2 val += read_int (cur->pc, 4); cur->pc += 4; } } } else if (mod == 0) { if (r_m == 5) { // BP or R13 val += read_int (cur->pc, 4); cur->pc += 4; } else val += getRegVal (cur, MRM_REGS (modrm), &undefRez); } else { // mod == 1 || mod == 2 val += getRegVal (cur, MRM_REGS (modrm), &undefRez); if (mod == 1) { val += read_int (cur->pc, 1); cur->pc++; } else { // mod == 2 val += read_int (cur->pc, 4); cur->pc += 4; } } if (undefRez) { DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx val=0x%lx\n", __LINE__, (unsigned long) cur->pc, (unsigned long) val); goto checkFP; } cur->regs[MRM_REGD (modrm)] = val; DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cur->pc=0x%lx val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n", __LINE__, (unsigned long) cur->pc, (unsigned long) val, (unsigned long) wctx->sp, (unsigned long) wctx->sbase); if (cur->pc != check_modrm (pc)) DprintfT (SP_DUMP_UNWIND, "stack_unwind%d ERROR: cur->pc=0x%lx != check_modrm(0x%lx)=0x%lx\n", __LINE__, (unsigned long) cur->pc, (unsigned long) pc, (unsigned long) check_modrm (pc)); if (MRM_REGD (modrm) == RSP) { if (!isInside ((unsigned long) val, wctx->sp, wctx->sbase)) { DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx opcode=0x%02x val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n", __LINE__, (unsigned long) cur->pc, opcode, (unsigned long) val, (unsigned long) wctx->sp, (unsigned long) wctx->sbase); goto checkFP; } cur->sp = (unsigned long *) val; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; } } else cur->pc = check_modrm (cur->pc); break; case 0x8e: /* mov Sw,Ew */ cur->pc = check_modrm (cur->pc); break; case 0x8f: /* pop Ev */ cur->pc = check_modrm (cur->pc); cur->sp += 1; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; break; case 0x90: /* nop */ break; case 0x91: /* xchg %eax,%ecx */ case 0x92: /* xchg %eax,%edx */ case 0x93: /* xchg %eax,%ebx */ case 0x94: /* xchg %eax,%esp XXXX */ case 0x95: /* xchg %eax,%ebp XXXX */ case 0x96: /* xchg %eax,%esi */ case 0x97: /* xchg %eax,%edi */ break; case 0x98: /* cbw/cwde */ case 0x99: /* cwd/cwq */ break; case 0x9a: /* callf Ap */ if (jmp_reg_switch_mode == 1) { struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur)); __collector_memcpy (tmpctx, cur, sizeof (*cur)); int rc = process_return (wctx, tmpctx); if (rc != RA_FAILURE) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } } cur->pc += 2 + a; break; case 0x9b: /* fwait */ case 0x9c: /* pushf Fv */ case 0x9d: /* popf Fv */ case 0x9e: /* sahf */ case 0x9f: /* lahf */ break; case 0xa0: /* mov al,Ob */ case 0xa1: /* mov eax,Ov */ case 0xa2: /* mov Ob,al */ case 0xa3: /* mov Ov,eax */ cur->pc += a; break; case 0xa4: /* movsb Yb,Xb */ case 0xa5: /* movsd Yv,Xv */ case 0xa6: /* cmpsb Yb,Xb */ case 0xa7: /* cmpsd Xv,Yv */ break; case 0xa8: /* test al,Ib */ cur->pc += 1; break; case 0xa9: /* test eax,Iz */ cur->pc += z; break; case 0xaa: /* stosb Yb,%al */ case 0xab: /* stosd Yv,%eax */ case 0xac: /* lodsb %al,Xb */ case 0xad: /* lodsd %eax,Xv */ case 0xae: /* scasb %al,Yb */ case 0xaf: /* scasd %eax,Yv */ break; case 0xb0: /* mov %al,Ib */ case 0xb1: /* mov %cl,Ib */ case 0xb2: /* mov %dl,Ib */ case 0xb3: /* mov %bl,Ib */ case 0xb4: /* mov %ah,Ib */ case 0xb5: /* mov %ch,Ib */ case 0xb6: /* mov %dh,Ib */ case 0xb7: /* mov %bh,Ib */ cur->pc += 1; break; case 0xb8: /* mov Iv,%eax */ case 0xb9: /* mov Iv,%ecx */ case 0xba: /* mov Iv,%edx */ case 0xbb: /* mov Iv,%ebx */ case 0xbc: /* mov Iv,%esp */ case 0xbd: /* mov Iv,%rbp */ case 0xbe: /* mov Iv,%esi */ case 0xbf: /* mov Iv,%edi */ reg = OPC_REG (opcode); if (reg == RAX) cur->rax = read_int (cur->pc, v); cur->pc += v; break; case 0xc0: /* group2 Eb,Ib */ case 0xc1: /* group2 Ev,Ib */ cur->pc = check_modrm (cur->pc) + 1; break; case 0xc2: /* ret Iw */ /* In the dynamic linker we may see that * the actual return address is at sp+immv, * while sp points to the resolved address. */ { immv = read_int (cur->pc, 2); int rc = process_return (wctx, cur); if (rc != RA_FAILURE) { if (jmp_reg_switch_mode == 1) { DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address under jmp switch mode, opcode = 0xc2\n", __LINE__); goto checkFP; } wctx->sp += immv; if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc2.\n", __LINE__); DELETE_CURCTX (); } break; case 0xc3: /* ret */ { int rc = process_return (wctx, cur); if (rc != RA_FAILURE) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } if (jmp_reg_switch_mode == 1) jmp_reg_switch_pc = cur->pc; DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc3.\n", __LINE__); DELETE_CURCTX (); } break; case 0xc4: /* group AVX, 3-bytes VEX prefix */ { unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction int len = parse_x86_AVX_instruction (pc); if (len < 3) DELETE_CURCTX (); else { pc += len; cur->pc = pc; } } break; case 0xc5: /* group AVX, 2-bytes VEX prefix */ { unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction int len = parse_x86_AVX_instruction (pc); if (len < 2) DELETE_CURCTX (); else { pc += len; cur->pc = pc; } } break; case 0xc6: modrm = *cur->pc; if (modrm == 0xf8) /* xabort */ cur->pc += 2; else /* mov Eb,Ib */ cur->pc = check_modrm (cur->pc) + 1; break; case 0xc7: modrm = *cur->pc; if (modrm == 0xf8) /* xbegin */ cur->pc += v + 1; else { /* mov Ev,Iz */ extop = MRM_EXT (modrm); if (extop != 0) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xc7\n", __LINE__); goto checkFP; } if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RAX) cur->rax = read_int (cur->pc + 1, z); cur->pc = check_modrm (cur->pc) + z; } break; case 0xc8: /* enter Iw,Ib */ cur->pc += 3; break; case 0xc9: /* leave */ /* mov %ebp,%esp */ cur->sp = cur->fp; /* pop %ebp */ if (cur->fp_loc == cur->sp) { cur->fp = cur->fp_sav; cur->fp_loc = NULL; } else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase) { cur->fp = (unsigned long*) (*cur->sp); if (wctx->fp == (unsigned long) cur->sp) cur->cval = RA_FROMFP; } cur->sp += 1; if (cur->sp - RED_ZONE > cur->sp_safe) cur->sp_safe = cur->sp - RED_ZONE; break; case 0xca: /* retf Iw */ cur->pc += 2; /* XXXX process return */ break; case 0xcb: /* retf */ break; /* XXXX process return */ case 0xcc: /* int 3 */ break; case 0xcd: /* int Ib */ if (*cur->pc == 0x80) { if (cur->rax == __NR_exit) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xcd.\n", __LINE__); DELETE_CURCTX (); break; } else if (cur->rax == __NR_rt_sigreturn) { if (jmp_reg_switch_mode == 1) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0xcd\n", __LINE__); goto checkFP; } wctx->sp = (unsigned long) cur->sp; if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN); return RA_RT_SIGRETURN; } #if WSIZE(32) else if (cur->rax == __NR_sigreturn) { if (jmp_reg_switch_mode == 1) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode = 0xc2\n", __LINE__); goto checkFP; } wctx->sp = (unsigned long) cur->sp; if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN); return RA_SIGRETURN; } #endif } cur->pc += 1; break; case 0xce: /* into */ case 0xcf: /* iret */ break; case 0xd0: /* shift group2 Eb,1 */ case 0xd1: /* shift group2 Ev,1 */ case 0xd2: /* shift group2 Eb,%cl */ case 0xd3: /* shift group2 Ev,%cl */ cur->pc = check_modrm (cur->pc); break; case 0xd4: /* aam Ib */ cur->pc += 1; break; case 0xd5: /* aad Ib */ cur->pc += 1; break; case 0xd6: /* falc? */ break; case 0xd7: cur->pc = check_modrm (cur->pc); cur->pc++; break; case 0xd8: /* esc instructions */ case 0xd9: case 0xda: case 0xdb: case 0xdc: case 0xdd: case 0xde: case 0xdf: cur->pc = check_modrm (cur->pc); break; case 0xe0: /* loopne Jb */ case 0xe1: /* loope Jb */ case 0xe2: /* loop Jb */ case 0xe3: /* jcxz Jb */ imm8 = *(char*) cur->pc++; if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX)) { int tidx = 0; unsigned char *npc = cur->pc + imm8; if (is_after_ret (npc)) break; while (npc > targets[tidx]) tidx += 1; if (npc != targets[tidx]) { if (ntrg < MAXTRGTS) { for (int i = 0; i < nctx; i++) if (buf[i].tidx >= tidx) buf[i].tidx++; /* insert a new target */ for (int i = ntrg; i > tidx; i--) targets[i] = targets[i - 1]; ntrg += 1; targets[tidx++] = npc; } else DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n"); struct AdvWalkContext *new = buf + nctx; nctx += 1; __collector_memcpy (new, cur, sizeof (*new)); new->pc = npc; new->tidx = tidx; cur = new; /* advance the new context first */ continue; } } else DprintfT (SP_DUMP_UNWIND, "unwind.c: nctx = max\n"); break; case 0xe4: case 0xe5: cur->pc = check_modrm (cur->pc); cur->pc++; break; case 0xe6: case 0xe7: cur->pc++; cur->pc = check_modrm (cur->pc); break; case 0xec: case 0xed: case 0xee: case 0xef: cur->pc = check_modrm (cur->pc); break; case 0xe8: /* call Jz (f64) */ { if (jmp_reg_switch_mode == 1) { struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur)); __collector_memcpy (tmpctx, cur, sizeof (*cur)); int rc = process_return (wctx, tmpctx); if (rc != RA_FAILURE) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } } int immz = read_int (cur->pc, z); if (immz == 0) /* special case in PIC code */ cur->sp -= 1; cur->pc += z; } break; case 0xe9: /* jump Jz */ { int immz = read_int (cur->pc, z); unsigned char *npc = cur->pc + z + immz; if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__); DELETE_CURCTX (); break; } int tidx = 0; while (npc > targets[tidx]) tidx += 1; if (npc != targets[tidx]) { if (ntrg < MAXTRGTS) { for (int i = 0; i < nctx; i++) if (buf[i].tidx >= tidx) buf[i].tidx++; /* insert a new target */ for (int i = ntrg; i > tidx; i--) targets[i] = targets[i - 1]; ntrg += 1; targets[tidx++] = npc; } else DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n"); cur->pc = npc; cur->tidx = tidx; continue; /* advance this context first */ } else { /* Delete context */ DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__); DELETE_CURCTX (); } } break; case 0xeb: /* jump imm8 */ { imm8 = *(char*) cur->pc++; int tidx = 0; unsigned char *npc = cur->pc + imm8; while (npc > targets[tidx]) tidx += 1; if (npc != targets[tidx]) { if (ntrg < MAXTRGTS) { for (int i = 0; i < nctx; i++) if (buf[i].tidx >= tidx) buf[i].tidx++; /* insert a new target */ for (int i = ntrg; i > tidx; i--) targets[i] = targets[i - 1]; ntrg += 1; targets[tidx++] = npc; } else DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n"); cur->pc = npc; cur->tidx = tidx; continue; /* advance this context first */ } else { /* Delete context */ DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xeb.\n", __LINE__); DELETE_CURCTX (); } } break; case 0xf0: /* lock prefix */ case 0xf2: /* repne prefix */ case 0xf3: /* repz prefix */ break; case 0xf4: /* hlt */ extop2 = *(cur->pc - 3); if (extop2 == 0x90) { // 17851712 occasional SEGV in find_i386_ret_addr in unwind.c during attach if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK); DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__); return RA_END_OF_STACK; } /* We see 'hlt' in _start. Stop analysis, revert to FP */ /* A workaround for the Linux main stack */ if (nctx > 1) { DELETE_CURCTX (); break; } if (cur->fp == 0) { if (jmp_reg_switch_mode == 1) { DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xf4\n"); goto checkFP; } cache_put (wctx, RA_EOSTCK); wctx->pc = 0; wctx->sp = 0; wctx->fp = 0; if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK); DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__); return RA_END_OF_STACK; } DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xf4\n", __LINE__); goto checkFP; case 0xf5: /* cmc */ break; case 0xf6: /* group3 Eb */ modrm = *cur->pc; extop = MRM_EXT (modrm); cur->pc = check_modrm (cur->pc); if (extop == 0x0) /* test Ib */ cur->pc += 1; break; case 0xf7: /* group3 Ev */ modrm = *cur->pc; extop = MRM_EXT (modrm); cur->pc = check_modrm (cur->pc); if (extop == 0x0) /* test Iz */ cur->pc += z; break; case 0xf8: /* clc */ case 0xf9: /* stc */ case 0xfa: /* cli */ case 0xfb: /* sti */ case 0xfc: /* cld */ case 0xfd: /* std */ break; case 0xfe: /* group4 */ modrm = *cur->pc; extop = MRM_EXT (modrm); switch (extop) { case 0x0: /* inc Eb */ case 0x1: /* dec Eb */ cur->pc = check_modrm (cur->pc); break; case 0x7: cur->pc = check_modrm (cur->pc); break; default: DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xfe %x\n", __LINE__, extop); DELETE_CURCTX (); break; } break; case 0xff: /* group5 */ modrm = *cur->pc; extop = MRM_EXT (modrm); switch (extop) { case 0x0: /* inc Ev */ case 0x1: /* dec Ev */ cur->pc = check_modrm (cur->pc); break; case 0x2: /* calln Ev */ if (jmp_reg_switch_mode == 1) { struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur)); __collector_memcpy (tmpctx, cur, sizeof (*cur)); int rc = process_return (wctx, tmpctx); if (rc != RA_FAILURE) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } } cur->pc = check_modrm (cur->pc); break; case 0x3: /* callf Ep */ if (jmp_reg_switch_mode == 1) { struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur)); __collector_memcpy (tmpctx, cur, sizeof (*cur)); int rc = process_return (wctx, tmpctx); if (rc != RA_FAILURE) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } } cur->pc = check_modrm (cur->pc); /* XXXX */ break; case 0x4: /* jumpn Ev */ /* This instruction appears in PLT or * in tail call optimization. * In both cases treat it as return. * Save jump *(reg) - switch, etc, for later use when no ctx left */ if (modrm == 0x25 || /* jumpn *disp32 */ MRM_MOD (modrm) == 0x40 || /* jumpn byte(reg) */ MRM_MOD (modrm) == 0x80) /* jumpn word(reg) */ { DprintfT (SP_DUMP_UNWIND, "unwind.c: PLT or tail call: %p\n", cur->pc - 1); int rc = process_return (wctx, cur); if (rc != RA_FAILURE) { if (jmp_reg_switch_mode == 1 && total_num_jmp_reg < max_num_jmp_reg_seen) { DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xff\n"); goto checkFP; } if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } } else if (modrm != 0x24 /*ignore SIB*/) /* jumpn *(reg) or jumpn reg */ { // 22846120 stack unwind does not find caller of __memcpy_ssse3_back with B64 intel-Linux /* * For now, let's deal rather narrowly with this scenario. If: * - we are in the middle of an "ff e2" instruction, and * - the next instruction is undefined ( 0f 0b == ud2 ) * then test return. (Might eventually have to broaden the scope * of this fix to other registers/etc.) */ if (cur->pc[0] == 0xe2 && cur->pc[1] == 0x0f && cur->pc[2] == 0x0b) { int rc = process_return_real (wctx, cur, 0); if (rc == RA_SUCCESS) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } } // 22691241 shjsynprog, jsynprog core dump from find_i386_ret_addr /* * Here is another oddity. Java 9 seems to emit dynamically generated * code where a code block ends with a "jmp *reg" and then padding to a * multiple-of-16 boundary and then a bunch of 0s. In this case, let's * not continue to walk bytes since we would be walking off the end of * the instructions into ... something. Treating them as instructions * can lead to unexpected results, including SEGV. */ /* * While the general problem deserves a better solution, let's look * here only for one particular case: * 0xff 0xe7 jmp *reg * nop to bring us to a multiple-of-16 boundary * 0x0000000000000a00 something that does not look like an instruction * * A different nop might be used depending on how much padding is needed * to reach that multiple-of-16 boundary. We've seen two: * 0x90 one byte * 0x0f 0x1f 0x40 0x00 four bytes */ // confirm the instruction is 0xff 0xe7 if (cur->pc[0] == 0xe7) { // check for correct-length nop and find next 16-byte boundary int found_nop = 0; unsigned long long *boundary = 0; switch ((((unsigned long) (cur->pc)) & 0xf)) { case 0xb: // look for 4-byte nop if (*((unsigned *) (cur->pc + 1)) == 0x00401f0f) found_nop = 1; boundary = (unsigned long long *) (cur->pc + 5); break; case 0xe: // look for 1-byte nop if (cur->pc[1] == 0x90) found_nop = 1; boundary = (unsigned long long *) (cur->pc + 2); break; default: break; } // if nop is found, check what's at the boundary if (found_nop && *boundary == 0x000000000a00) { DELETE_CURCTX (); break; } } DprintfT (SP_DUMP_UNWIND, "unwind.c: probably PLT or tail call or switch table: %p\n", cur->pc - 1); if (num_jmp_reg < expected_num_jmp_reg) { if (jmp_reg_ctx[num_jmp_reg] == NULL) jmp_reg_ctx[num_jmp_reg] = (struct AdvWalkContext *) alloca (sizeof (*cur)); if (jmp_reg_ctx[num_jmp_reg] != NULL) __collector_memcpy (jmp_reg_ctx[num_jmp_reg], cur, sizeof (*cur)); } if (num_jmp_reg < expected_num_jmp_reg || (num_jmp_reg >= expected_num_jmp_reg && jmp_reg_ctx[expected_num_jmp_reg - 1] != NULL && cur->pc != jmp_reg_ctx[expected_num_jmp_reg - 1]->pc)) { num_jmp_reg++; total_num_jmp_reg++; } if (jmp_reg_switch_mode == 1 && total_num_jmp_reg >= max_num_jmp_reg_seen) { int rc = process_return_real (wctx, cur, 0); if (rc == RA_SUCCESS) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } } } DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xff.\n", __LINE__); DELETE_CURCTX (); break; case 0x5: /* jmpf Ep */ cur->pc = check_modrm (cur->pc); /* XXXX */ break; case 0x6: /* push Ev */ cur->pc = check_modrm (cur->pc); cur->sp -= 1; break; case 0x7: cur->pc = check_modrm (cur->pc); /* XXXX */ if (jmp_reg_switch_mode == 1) { int rc = process_return_real (wctx, cur, 0); if (rc == RA_SUCCESS) { if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc); return rc; } } break; default: DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xff %x\n", __LINE__, (int) extop); DELETE_CURCTX (); break; } break; default: DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x%x\n", __LINE__, (int) opcode); DELETE_CURCTX (); break; } /* switch to next context */ if (++cur >= buf + nctx) cur = buf; DprintfT (SP_DUMP_UNWIND, "unwind.c:%d switch context: cur=0x%lx(%ld) nctx=%d cnt=%d\n", __LINE__, (unsigned long) cur, (long) (cur - buf), (int) nctx, (int) cnt); } checkFP: Tprintf (DBG_LT3, "find_i386_ret_addr:%d checkFP: wctx=0x%lx fp=0x%lx ln=0x%lx pc=0x%lx sbase=0x%lx sp=0x%lx tbgn=0x%lx tend=0x%lx\n", __LINE__, (unsigned long) wctx, (unsigned long) wctx->fp, (unsigned long) wctx->ln, (unsigned long) wctx->pc, (unsigned long) wctx->sbase, (unsigned long) wctx->sp, (unsigned long) wctx->tbgn, (unsigned long) wctx->tend); if (jmp_reg_switch_mode == 1) { // not deal with switch cases not ending with ret if (jmp_reg_switch_backup_ctx != NULL) __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur)); DprintfT (SP_DUMP_UNWIND, "stack_unwind jmp reg mode on: pc = 0x%lx cnt = %d, nctx = %d\n", wctx->pc, cnt, nctx); } unsigned long *cur_fp = cur->fp; unsigned long *cur_sp = cur->sp; if (do_walk == 0) __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext)); /* Resort to the frame pointer */ if (cur->fp_loc) cur->fp = cur->fp_sav; cur->sp = cur->fp; if ((unsigned long) cur->sp >= wctx->sbase || (unsigned long) cur->sp < wctx->sp) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d do_walk=%d cur->sp=0x%p out of range. wctx->sbase=0x%lx wctx->sp=0x%lx wctx->pc=0x%lx\n", __LINE__, (int) do_walk, cur->sp, (unsigned long) wctx->sbase, (unsigned long) wctx->sp, (unsigned long) wctx->pc); if (do_walk == 0) { cur->sp = cur_sp; cur->fp = cur_fp; do_walk = 1; save_ctx = 1; goto startWalk; } if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE); return RA_FAILURE; } unsigned long fp = *cur->sp++; if (fp <= (unsigned long) cur->sp || fp >= wctx->sbase) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d fp=0x%016llx out of range. cur->sp=%p wctx->sbase=0x%lx wctx->pc=0x%lx\n", __LINE__, (unsigned long long) fp, cur->sp, (unsigned long) wctx->sbase, (unsigned long) wctx->pc); if (do_walk == 0) { cur->sp = cur_sp; cur->fp = cur_fp; do_walk = 1; save_ctx = 1; goto startWalk; } if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE); return RA_FAILURE; } unsigned long ra = *cur->sp++; if (ra == 0) { cache_put (wctx, RA_EOSTCK); DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK wctx->pc = 0x%lx\n", __LINE__, wctx->pc); if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK); return RA_END_OF_STACK; } unsigned long tbgn = wctx->tbgn; unsigned long tend = wctx->tend; if (ra < tbgn || ra >= tend) { // We do not know yet if update_map_segments is really needed if (!__collector_check_segment (ra, &tbgn, &tend, 0)) { DprintfT (SP_DUMP_UNWIND, "unwind.c: __collector_check_segment fail. wctx->pc = 0x%lx\n", wctx->pc); if (do_walk == 0) { cur->sp = cur_sp; cur->fp = cur_fp; do_walk = 1; save_ctx = 1; goto startWalk; } if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE); return RA_FAILURE; } } unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend); if (npc == 0) { DprintfT (SP_DUMP_UNWIND, "unwind.c: adjust_ret_addr fail. wctx->pc = 0x%lx\n", wctx->pc); if (do_walk == 0) { cur->sp = cur_sp; cur->fp = cur_fp; do_walk = 1; save_ctx = 1; goto startWalk; } if (save_ctx) omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE); return RA_FAILURE; } wctx->pc = npc; wctx->sp = (unsigned long) cur->sp; wctx->fp = fp; wctx->tbgn = tbgn; wctx->tend = tend; if (save_ctx) { omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SUCCESS); DprintfT (SP_DUMP_UNWIND, "unwind.c: cache walk context. wctx_pc_save->pc = 0x%lx\n", wctx_pc_save.pc); } return RA_SUCCESS; } /* * We have the return address, but we would like to report to the user * the calling PC, which is the instruction immediately preceding the * return address. Unfortunately, x86 instructions can have variable * length. So we back up 8 bytes and try to figure out where the * calling PC starts. (FWIW, call instructions are often 5-bytes long.) */ unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend) { unsigned long npc = 0; int i = segoff < 8 ? segoff : 8; for (; i > 1; i--) { unsigned char *ptr = (unsigned char*) ra - i; int z = 4; int a = 4; int done = 0; int bVal; while (!done) { bVal = getByteInstruction (ptr); if (bVal < 0) return 0; switch (bVal) { case 0x26: case 0x36: #if WSIZE(64) ptr += 1; break; #endif case 0x64: case 0x65: bVal = getByteInstruction (ptr + 1); if (bVal < 0) return 0; if (bVal == 0xe8) // a workaround for bug 16193041, assuming "call Jz" has no segment override prefix done = 1; else ptr += 1; break; case 0x66: z = 2; ptr += 1; break; case 0x67: a = 2; ptr += 1; break; default: done = 1; break; } } #if WSIZE(64) bVal = getByteInstruction (ptr); if (bVal < 0) return 0; if (bVal >= 0x40 && bVal <= 0x4f) { /* XXXX not all REX codes applicable */ if (bVal & 0x8) z = 4; ptr += 1; } #endif int opcode = getByteInstruction (ptr); if (opcode < 0) return 0; ptr++; switch (opcode) { case 0xe8: /* call Jz (f64) */ ptr += z; break; case 0x9a: /* callf Ap */ ptr += 2 + a; break; case 0xff: /* calln Ev , callf Ep */ { int extop = MRM_EXT (*ptr); if (extop == 2 || extop == 3) ptr = check_modrm (ptr); } break; default: continue; } if ((unsigned long) ptr == ra) { npc = ra - i; break; } } if (npc == 0) { unsigned char * ptr = (unsigned char *) ra; #if WSIZE(32) // test __kernel_sigreturn or __kernel_rt_sigreturn if ((ra + 7 < tend && getByteInstruction (ptr) == 0x58 && getByteInstruction (ptr + 1) == 0xb8 && getByteInstruction (ptr + 6) == 0xcd && getByteInstruction (ptr + 7) == 0x80) /* pop %eax; mov $NNNN, %eax; int */ || (ra + 7 < tend && getByteInstruction (ptr) == 0x58 && getByteInstruction (ptr + 1) == 0xb8 && getByteInstruction (ptr + 6) == 0x0f && getByteInstruction (ptr + 7) == 0x05) /* pop %eax; mov $NNNN, %eax; syscall */ || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8 && getByteInstruction (ptr + 5) == 0xcd && getByteInstruction (ptr + 6) == 0x80) /* mov $NNNN, %eax; int */ || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8 && getByteInstruction (ptr + 5) == 0x0f && getByteInstruction (ptr + 6) == 0x05)) /* mov $NNNN, %eax; syscall */ #else //WSIZE(64) // test __restore_rt if (ra + 8 < tend && getByteInstruction (ptr) == 0x48 && getByteInstruction (ptr + 7) == 0x0f && getByteInstruction (ptr + 8) == 0x05) /* mov $NNNNNNNN, %rax; syscall */ #endif { npc = ra; } } if (npc == 0 && __collector_java_mode && __collector_java_asyncgetcalltrace_loaded) { // detect jvm interpreter code for java user threads unsigned char * ptr = (unsigned char *) ra; #if WSIZE(32) // up to J170 /* * ff 24 9d e0 64 02 f5 jmp *-0xafd9b20(,%ebx,4) * 8b 4e 01 movl 1(%esi),%ecx * f7 d1 notl %ecx * 8b 5d ec movl -0x14(%ebp),%ebx * c1 e1 02 shll $2,%ecx * eb d8 jmp .-0x26 [ 0x92a ] * 83 ec 08 subl $8,%esp || 8b 65 f8 movl -8(%ebp),%esp * */ if (ra - 20 >= (ra - segoff) && ((*ptr == 0x83 && *(ptr + 1) == 0xec) || (*ptr == 0x8b && *(ptr + 1) == 0x65)) && *(ptr - 2) == 0xeb && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d && *(ptr - 10) == 0xf7 && *(ptr - 9) == 0xd1 && *(ptr - 13) == 0x8b && *(ptr - 12) == 0x4e && *(ptr - 20) == 0xff && *(ptr - 19) == 0x24 && *(ptr - 18) == 0x9d) { npc = ra - 20; } // J180 J190 // ff 24 9d ** ** ** ** jmp *-0x*******(,%ebx,4) if (npc == 0 && ra - 7 >= (ra - segoff) && *(ptr - 7) == 0xff && *(ptr - 6) == 0x24 && *(ptr - 5) == 0x9d) { npc = ra - 7; } #else //WSIZE(64) // up to J170 /* * 41 ff 24 da jmp *(%r10,%rbx,8) * 41 8b 4d 01 movl 1(%r13),%ecx * f7 d1 notl %ecx * 48 8b 5d d8 movq -0x28(%rbp),%rbx * c1 e1 02 shll $2,%ecx * eb cc jmp .-0x32 [ 0xd23 ] * 48 8b 65 f0 movq -0x10(%rbp),%rsp */ if (ra - 19 >= (ra - segoff) && *ptr == 0x48 && ((*(ptr + 1) == 0x8b && *(ptr + 2) == 0x65) || (*(ptr + 1) == 0x83 && *(ptr + 2) == 0xec)) && *(ptr - 2) == 0xeb && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1 && *(ptr - 9) == 0x48 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d && *(ptr - 11) == 0xf7 && *(ptr - 10) == 0xd1 && *(ptr - 15) == 0x41 && *(ptr - 14) == 0x8b && *(ptr - 13) == 0x4d && *(ptr - 19) == 0x41 && *(ptr - 18) == 0xff) npc = ra - 19; // J180 J190 // 41 ff 24 da jmp *(%r10,%rbx,8) if (npc == 0 && ra - 4 >= (ra - segoff) && *(ptr - 4) == 0x41 && *(ptr - 3) == 0xff && *(ptr - 2) == 0x24 && *(ptr - 1) == 0xda) npc = ra - 4; #endif } return npc; } /* * Parses AVX instruction and returns its length. * Returns 0 if parsing failed. * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf */ static int parse_x86_AVX_instruction (unsigned char *pc) { /* * VEX prefix has a two-byte form (0xc5) and a three byte form (0xc4). * If an instruction syntax can be encoded using the two-byte form, * it can also be encoded using the three byte form of VEX. * The latter increases the length of the instruction by one byte. * This may be helpful in some situations for code alignment. * Byte 0 Byte 1 Byte 2 Byte 3 (Bit Position) 7 0 7 6 5 4 0 7 6 3 2 10 3-byte VEX [ 11000100 ] [ R X B | m-mmmm ] [ W | vvvv | L | pp ] 7 0 7 6 3 2 10 2-byte VEX [ 11000101 ] [ R | vvvv | L | pp ] 7 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 4-byte EVEX [ 01100010 ] [ R X B R1 0 0 m m ] [ W v v v v 1 p p ] [ z L1 L B1 V1 a a a ] R: REX.R in 1's complement (inverted) form 0: Same as REX.R=1 (64-bit mode only) 1: Same as REX.R=0 (must be 1 in 32-bit mode) X: REX.X in 1's complement (inverted) form 0: Same as REX.X=1 (64-bit mode only) 1: Same as REX.X=0 (must be 1 in 32-bit mode) B: REX.B in 1's complement (inverted) form 0: Same as REX.B=1 (64-bit mode only) 1: Same as REX.B=0 (Ignored in 32-bit mode). W: opcode specific (use like REX.W, or used for opcode extension, or ignored, depending on the opcode byte) m-mmmm: 00000: Reserved for future use (will #UD) 00001: implied 0F leading opcode byte 00010: implied 0F 38 leading opcode bytes 00011: implied 0F 3A leading opcode bytes 00100-11111: Reserved for future use (will #UD) vvvv: a register specifier (in 1's complement form) or 1111 if unused. L: Vector Length 0: scalar or 128-bit vector 1: 256-bit vector pp: opcode extension providing equivalent functionality of a SIMD prefix 00: None 01: 66 10: F3 11: F2 * * Example: 0xc5f877L vzeroupper * VEX prefix: 0xc5 0x77 * Opcode: 0xf8 * */ int len = 0; disassemble_info dis_info; dis_info.arch = bfd_arch_i386; dis_info.mach = bfd_mach_x86_64; dis_info.flavour = bfd_target_unknown_flavour; dis_info.endian = BFD_ENDIAN_UNKNOWN; dis_info.endian_code = dis_info.endian; dis_info.octets_per_byte = 1; dis_info.disassembler_needs_relocs = FALSE; dis_info.fprintf_func = fprintf_func; dis_info.fprintf_styled_func = fprintf_styled_func; dis_info.stream = NULL; dis_info.disassembler_options = NULL; dis_info.read_memory_func = read_memory_func; dis_info.memory_error_func = memory_error_func; dis_info.print_address_func = print_address_func; dis_info.symbol_at_address_func = symbol_at_address_func; dis_info.symbol_is_valid = symbol_is_valid; dis_info.display_endian = BFD_ENDIAN_UNKNOWN; dis_info.symtab = NULL; dis_info.symtab_size = 0; dis_info.buffer_vma = 0; dis_info.buffer = pc; dis_info.buffer_length = 8; disassembler_ftype disassemble = print_insn_i386; if (disassemble == NULL) { DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction ERROR: unsupported disassemble\n"); return 0; } len = disassemble (0, &dis_info); DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction: returned %d pc: %p\n", len, pc); return len; } /* * In the Intel world, a stack frame looks like this: * * %fp0->| | * |-------------------------------| * | Args to next subroutine | * |-------------------------------|-\ * %sp0->| One word struct-ret address | | * |-------------------------------| > minimum stack frame (8 bytes) * | Previous frame pointer (%fp0)| | * %fp1->|-------------------------------|-/ * | Local variables | * %sp1->|-------------------------------| * */ int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode) { long *lbuf = (long*) buf; int lsize = size / sizeof (long); int ind = 0; int do_walk = 1; int extra_frame = 0; if (mode & FRINFO_NO_WALK) do_walk = 0; if ((mode & 0xffff) == FRINFO_FROM_STACK) extra_frame = 1; /* * trace the stack frames from user stack. * We are assuming that the frame pointer and return address * are null when we are at the top level. */ struct WalkContext wctx; wctx.pc = GET_PC (context); wctx.sp = GET_SP (context); wctx.fp = GET_FP (context); wctx.ln = (unsigned long) context->uc_link; unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key); if (sbase && *sbase > wctx.sp) wctx.sbase = *sbase; else { wctx.sbase = wctx.sp + 0x100000; if (wctx.sbase < wctx.sp) /* overflow */ wctx.sbase = (unsigned long) - 1; } // We do not know yet if update_map_segments is really needed __collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0); for (;;) { if (ind >= lsize || wctx.pc == 0) break; if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2) { lbuf[0] = wctx.pc; if (ind == 0) { ind++; if (ind >= lsize) break; } } if (bptr == NULL || wctx.sp > (unsigned long) bptr) { lbuf[ind++] = wctx.pc; if (ind >= lsize) break; } for (;;) { if (eptr != NULL && wctx.sp >= (unsigned long) eptr) { ind = ind >= 2 ? ind - 2 : 0; goto exit; } int ret = find_i386_ret_addr (&wctx, do_walk); DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d find_i386_ret_addr returns %d\n", __LINE__, ret); if (ret == RA_FAILURE) { /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ goto exit; } if (ret == RA_END_OF_STACK) goto exit; #if WSIZE(32) if (ret == RA_RT_SIGRETURN) { struct SigFrame { unsigned long arg0; unsigned long arg1; unsigned long arg2; } *sframe = (struct SigFrame*) wctx.sp; ucontext_t *ncontext = (ucontext_t*) sframe->arg2; wctx.pc = GET_PC (ncontext); if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0)) { /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ goto exit; } unsigned long nsp = GET_SP (ncontext); /* Check the new stack pointer */ if (nsp <= sframe->arg2 || nsp > sframe->arg2 + sizeof (ucontext_t) + 1024) { /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ goto exit; } wctx.sp = nsp; wctx.fp = GET_FP (ncontext); break; } else if (ret == RA_SIGRETURN) { struct sigcontext *sctx = (struct sigcontext*) wctx.sp; wctx.pc = sctx->eip; if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0)) { /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ goto exit; } wctx.sp = sctx->esp; wctx.fp = sctx->ebp; break; } #elif WSIZE(64) if (ret == RA_RT_SIGRETURN) { ucontext_t *ncontext = (ucontext_t*) wctx.sp; wctx.pc = GET_PC (ncontext); if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0)) { /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ goto exit; } unsigned long nsp = GET_SP (ncontext); /* Check the new stack pointer */ if (nsp <= wctx.sp || nsp > wctx.sp + sizeof (ucontext_t) + 1024) { /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */ goto exit; } wctx.sp = nsp; wctx.fp = GET_FP (ncontext); break; } #endif /* WSIZE() */ if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2) { lbuf[0] = wctx.pc; if (ind == 0) { ind++; if (ind >= lsize) break; } } if (bptr == NULL || wctx.sp > (unsigned long) bptr) { lbuf[ind++] = wctx.pc; if (ind >= lsize) goto exit; } } } exit: #if defined(DEBUG) if ((SP_DUMP_UNWIND & __collector_tracelevel) != 0) { DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d found %d frames\n\n", __LINE__, ind); for (int i = 0; i < ind; i++) DprintfT (SP_DUMP_UNWIND, " %3d: 0x%lx\n", i, (unsigned long) lbuf[i]); } #endif dump_stack (__LINE__); if (ind >= lsize) { ind = lsize - 1; lbuf[ind++] = (unsigned long) SP_TRUNC_STACK_MARKER; } return ind * sizeof (long); } #elif ARCH(Aarch64) static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode) { if (buf && bptr && eptr && context && size + mode > 0) getByteInstruction ((unsigned char *) eptr); int ind = 0; __u64 *lbuf = (void *) buf; int lsize = size / sizeof (__u64); __u64 pc = context->uc_mcontext.pc; __u64 sp = context->uc_mcontext.sp; __u64 stack_base; unsigned long tbgn = 0; unsigned long tend = 0; unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key); if (sbase && *sbase > sp) stack_base = *sbase; else { stack_base = sp + 0x100000; if (stack_base < sp) // overflow stack_base = (__u64) -1; } DprintfT (SP_DUMP_UNWIND, "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx stack_base=0x%llx\n", __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp, (unsigned long long) stack_base); while (sp && pc) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx\n", __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp); // Dl_info dlinfo; // if (!dladdr ((void *) pc, &dlinfo)) // break; // DprintfT (SP_DUMP_UNWIND, "%2d: %llx <%s+%llu> (%s)\n", // ind, (unsigned long long) pc, // dlinfo.dli_sname ? dlinfo.dli_sname : "(?)", // (unsigned long long) pc - (unsigned long long) dlinfo.dli_saddr, // dlinfo.dli_fname); lbuf[ind++] = pc; if (ind >= lsize || sp >= stack_base || (sp & 15) != 0) break; if (pc < tbgn || pc >= tend) if (!__collector_check_segment ((unsigned long) pc, &tbgn, &tend, 0)) { DprintfT (SP_DUMP_UNWIND, "unwind.c:%d __collector_check_segment failed. sp=0x%lx\n", __LINE__, (unsigned long) sp); break; } pc = ((__u64 *) sp)[1]; __u64 old_sp = sp; sp = ((__u64 *) sp)[0]; if (sp < old_sp) break; } if (ind >= lsize) { ind = lsize - 1; lbuf[ind++] = (__u64) SP_TRUNC_STACK_MARKER; } return ind * sizeof (__u64); } #endif /* ARCH() */