diff options
author | Jakub Jelinek <jakub@redhat.com> | 2023-11-15 12:45:58 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2023-11-15 12:45:58 +0100 |
commit | 28219f7f99a80519d1c6ab5e5dc83b4c7f8d7251 (patch) | |
tree | 42e3657c58ff08a654f04aeb0f43b3bc75930bbc /libsanitizer/interception | |
parent | 4d86dc51e34d2a5695b617afeb56e3414836a79a (diff) | |
download | gcc-28219f7f99a80519d1c6ab5e5dc83b4c7f8d7251.zip gcc-28219f7f99a80519d1c6ab5e5dc83b4c7f8d7251.tar.gz gcc-28219f7f99a80519d1c6ab5e5dc83b4c7f8d7251.tar.bz2 |
libsanitizer: merge from upstream (c425db2eb558c263)
The following patch is result of libsanitizer/merge.sh
from c425db2eb558c263 (yesterday evening).
Bootstrapped/regtested on x86_64-linux and i686-linux (together with
the follow-up 3 patches I'm about to post).
BTW, seems upstream has added riscv64 support for I think lsan/tsan,
so if anyone is willing to try it there, it would be a matter of
copying e.g. the s390*-*-linux* libsanitizer/configure.tgt entry
to riscv64-*-linux* with the obvious s/s390x/riscv64/ change in it.
Diffstat (limited to 'libsanitizer/interception')
-rw-r--r-- | libsanitizer/interception/interception.h | 200 | ||||
-rw-r--r-- | libsanitizer/interception/interception_linux.cpp | 16 | ||||
-rw-r--r-- | libsanitizer/interception/interception_linux.h | 18 | ||||
-rw-r--r-- | libsanitizer/interception/interception_win.cpp | 92 | ||||
-rw-r--r-- | libsanitizer/interception/interception_win.h | 5 |
5 files changed, 234 insertions, 97 deletions
diff --git a/libsanitizer/interception/interception.h b/libsanitizer/interception/interception.h index d97974e..069f73d 100644 --- a/libsanitizer/interception/interception.h +++ b/libsanitizer/interception/interception.h @@ -14,9 +14,10 @@ #ifndef INTERCEPTION_H #define INTERCEPTION_H +#include "sanitizer_common/sanitizer_asm.h" #include "sanitizer_common/sanitizer_internal_defs.h" -#if !SANITIZER_LINUX && !SANITIZER_FREEBSD && !SANITIZER_APPLE && \ +#if !SANITIZER_LINUX && !SANITIZER_FREEBSD && !SANITIZER_APPLE && \ !SANITIZER_NETBSD && !SANITIZER_WINDOWS && !SANITIZER_FUCHSIA && \ !SANITIZER_SOLARIS # error "Interception doesn't work on this operating system." @@ -67,24 +68,50 @@ typedef __sanitizer::OFF64_T OFF64_T; // for more details). To intercept such functions you need to use the // INTERCEPTOR_WITH_SUFFIX(...) macro. -// How it works: -// To replace system functions on Linux we just need to declare functions -// with same names in our library and then obtain the real function pointers +// How it works on Linux +// --------------------- +// +// To replace system functions on Linux we just need to declare functions with +// the same names in our library and then obtain the real function pointers // using dlsym(). -// There is one complication. A user may also intercept some of the functions -// we intercept. To resolve this we declare our interceptors with __interceptor_ -// prefix, and then make actual interceptors weak aliases to __interceptor_ -// functions. // -// This is not so on Mac OS, where the two-level namespace makes -// our replacement functions invisible to other libraries. This may be overcomed -// using the DYLD_FORCE_FLAT_NAMESPACE, but some errors loading the shared -// libraries in Chromium were noticed when doing so. +// There is one complication: a user may also intercept some of the functions we +// intercept. To allow for up to 3 interceptors (including ours) of a given +// function "func", the interceptor implementation is in ___interceptor_func, +// which is aliased by a weak function __interceptor_func, which in turn is +// aliased (via a trampoline) by weak wrapper function "func". +// +// Most user interceptors should define a foreign interceptor as follows: +// +// - provide a non-weak function "func" that performs interception; +// - if __interceptor_func exists, call it to perform the real functionality; +// - if it does not exist, figure out the real function and call it instead. +// +// In rare cases, a foreign interceptor (of another dynamic analysis runtime) +// may be defined as follows (on supported architectures): +// +// - provide a non-weak function __interceptor_func that performs interception; +// - if ___interceptor_func exists, call it to perform the real functionality; +// - if it does not exist, figure out the real function and call it instead; +// - provide a weak function "func" that is an alias to __interceptor_func. +// +// With this protocol, sanitizer interceptors, foreign user interceptors, and +// foreign interceptors of other dynamic analysis runtimes, or any combination +// thereof, may co-exist simultaneously. +// +// How it works on Mac OS +// ---------------------- +// +// This is not so on Mac OS, where the two-level namespace makes our replacement +// functions invisible to other libraries. This may be overcomed using the +// DYLD_FORCE_FLAT_NAMESPACE, but some errors loading the shared libraries in +// Chromium were noticed when doing so. +// // Instead we create a dylib containing a __DATA,__interpose section that // associates library functions with their wrappers. When this dylib is -// preloaded before an executable using DYLD_INSERT_LIBRARIES, it routes all -// the calls to interposed functions done through stubs to the wrapper -// functions. +// preloaded before an executable using DYLD_INSERT_LIBRARIES, it routes all the +// calls to interposed functions done through stubs to the wrapper functions. +// // As it's decided at compile time which functions are to be intercepted on Mac, // INTERCEPT_FUNCTION() is effectively a no-op on this system. @@ -100,53 +127,102 @@ struct interpose_substitution { // For a function foo() create a global pair of pointers { wrap_foo, foo } in // the __DATA,__interpose section. // As a result all the calls to foo() will be routed to wrap_foo() at runtime. -#define INTERPOSER(func_name) __attribute__((used)) \ +#define INTERPOSER(func_name) __attribute__((used)) \ const interpose_substitution substitution_##func_name[] \ __attribute__((section("__DATA, __interpose"))) = { \ - { reinterpret_cast<const uptr>(WRAP(func_name)), \ - reinterpret_cast<const uptr>(func_name) } \ + { reinterpret_cast<const uptr>(WRAP(func_name)), \ + reinterpret_cast<const uptr>(func_name) } \ } // For a function foo() and a wrapper function bar() create a global pair // of pointers { bar, foo } in the __DATA,__interpose section. // As a result all the calls to foo() will be routed to bar() at runtime. #define INTERPOSER_2(func_name, wrapper_name) __attribute__((used)) \ -const interpose_substitution substitution_##func_name[] \ - __attribute__((section("__DATA, __interpose"))) = { \ - { reinterpret_cast<const uptr>(wrapper_name), \ - reinterpret_cast<const uptr>(func_name) } \ +const interpose_substitution substitution_##func_name[] \ + __attribute__((section("__DATA, __interpose"))) = { \ + { reinterpret_cast<const uptr>(wrapper_name), \ + reinterpret_cast<const uptr>(func_name) } \ } # define WRAP(x) wrap_##x -# define WRAPPER_NAME(x) "wrap_"#x +# define TRAMPOLINE(x) WRAP(x) # define INTERCEPTOR_ATTRIBUTE # define DECLARE_WRAPPER(ret_type, func, ...) #elif SANITIZER_WINDOWS # define WRAP(x) __asan_wrap_##x -# define WRAPPER_NAME(x) "__asan_wrap_"#x +# define TRAMPOLINE(x) WRAP(x) # define INTERCEPTOR_ATTRIBUTE __declspec(dllexport) -# define DECLARE_WRAPPER(ret_type, func, ...) \ +# define DECLARE_WRAPPER(ret_type, func, ...) \ extern "C" ret_type func(__VA_ARGS__); -# define DECLARE_WRAPPER_WINAPI(ret_type, func, ...) \ +# define DECLARE_WRAPPER_WINAPI(ret_type, func, ...) \ extern "C" __declspec(dllimport) ret_type __stdcall func(__VA_ARGS__); -#elif SANITIZER_FREEBSD || SANITIZER_NETBSD -# define WRAP(x) __interceptor_ ## x -# define WRAPPER_NAME(x) "__interceptor_" #x +#elif !SANITIZER_FUCHSIA // LINUX, FREEBSD, NETBSD, SOLARIS # define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default"))) +# if ASM_INTERCEPTOR_TRAMPOLINE_SUPPORT +// Weak aliases of weak aliases do not work, therefore we need to set up a +// trampoline function. The function "func" is a weak alias to the trampoline +// (so that we may check if "func" was overridden), which calls the weak +// function __interceptor_func, which in turn aliases the actual interceptor +// implementation ___interceptor_func: +// +// [wrapper "func": weak] --(alias)--> [TRAMPOLINE(func)] +// | +// +--------(tail call)-------+ +// | +// v +// [__interceptor_func: weak] --(alias)--> [WRAP(func)] +// +// We use inline assembly to define most of this, because not all compilers +// support functions with the "naked" attribute with every architecture. +# define WRAP(x) ___interceptor_ ## x +# define TRAMPOLINE(x) __interceptor_trampoline_ ## x +# if SANITIZER_FREEBSD || SANITIZER_NETBSD // FreeBSD's dynamic linker (incompliantly) gives non-weak symbols higher // priority than weak ones so weak aliases won't work for indirect calls // in position-independent (-fPIC / -fPIE) mode. -# define DECLARE_WRAPPER(ret_type, func, ...) \ - extern "C" ret_type func(__VA_ARGS__) \ - __attribute__((alias("__interceptor_" #func), visibility("default"))); -#elif !SANITIZER_FUCHSIA -# define WRAP(x) __interceptor_ ## x -# define WRAPPER_NAME(x) "__interceptor_" #x -# define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default"))) -# define DECLARE_WRAPPER(ret_type, func, ...) \ - extern "C" ret_type func(__VA_ARGS__) \ - __attribute__((weak, alias("__interceptor_" #func), visibility("default"))); +# define __ASM_WEAK_WRAPPER(func) ".globl " #func "\n" +# else +# define __ASM_WEAK_WRAPPER(func) ".weak " #func "\n" +# endif // SANITIZER_FREEBSD || SANITIZER_NETBSD +// Keep trampoline implementation in sync with sanitizer_common/sanitizer_asm.h +# define DECLARE_WRAPPER(ret_type, func, ...) \ + extern "C" ret_type func(__VA_ARGS__); \ + extern "C" ret_type TRAMPOLINE(func)(__VA_ARGS__); \ + extern "C" ret_type __interceptor_##func(__VA_ARGS__) \ + INTERCEPTOR_ATTRIBUTE __attribute__((weak)) ALIAS(WRAP(func)); \ + asm( \ + ".text\n" \ + __ASM_WEAK_WRAPPER(func) \ + ".set " #func ", " SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n" \ + ".globl " SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n" \ + ".type " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", %function\n" \ + SANITIZER_STRINGIFY(TRAMPOLINE(func)) ":\n" \ + SANITIZER_STRINGIFY(CFI_STARTPROC) "\n" \ + SANITIZER_STRINGIFY(ASM_TAIL_CALL) " __interceptor_" \ + SANITIZER_STRINGIFY(ASM_PREEMPTIBLE_SYM(func)) "\n" \ + SANITIZER_STRINGIFY(CFI_ENDPROC) "\n" \ + ".size " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", " \ + ".-" SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n" \ + ); +# else // ASM_INTERCEPTOR_TRAMPOLINE_SUPPORT +// Some architectures cannot implement efficient interceptor trampolines with +// just a plain jump due to complexities of resolving a preemptible symbol. In +// those cases, revert to just this scheme: +// +// [wrapper "func": weak] --(alias)--> [WRAP(func)] +// +# define WRAP(x) __interceptor_ ## x +# define TRAMPOLINE(x) WRAP(x) +# if SANITIZER_FREEBSD || SANITIZER_NETBSD +# define __ATTRIBUTE_WEAK_WRAPPER +# else +# define __ATTRIBUTE_WEAK_WRAPPER __attribute__((weak)) +# endif // SANITIZER_FREEBSD || SANITIZER_NETBSD +# define DECLARE_WRAPPER(ret_type, func, ...) \ + extern "C" ret_type func(__VA_ARGS__) \ + INTERCEPTOR_ATTRIBUTE __ATTRIBUTE_WEAK_WRAPPER ALIAS(WRAP(func)); +# endif // ASM_INTERCEPTOR_TRAMPOLINE_SUPPORT #endif #if SANITIZER_FUCHSIA @@ -162,10 +238,10 @@ const interpose_substitution substitution_##func_name[] \ # define REAL(x) __interception::PTR_TO_REAL(x) # define FUNC_TYPE(x) x##_type -# define DECLARE_REAL(ret_type, func, ...) \ +# define DECLARE_REAL(ret_type, func, ...) \ typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__); \ - namespace __interception { \ - extern FUNC_TYPE(func) PTR_TO_REAL(func); \ + namespace __interception { \ + extern FUNC_TYPE(func) PTR_TO_REAL(func); \ } # define ASSIGN_REAL(dst, src) REAL(dst) = REAL(src) #else // SANITIZER_APPLE @@ -176,14 +252,16 @@ const interpose_substitution substitution_##func_name[] \ #endif // SANITIZER_APPLE #if !SANITIZER_FUCHSIA -# define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) \ +# define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) \ DECLARE_REAL(ret_type, func, __VA_ARGS__) \ + extern "C" ret_type TRAMPOLINE(func)(__VA_ARGS__); \ extern "C" ret_type WRAP(func)(__VA_ARGS__); // Declare an interceptor and its wrapper defined in a different translation // unit (ex. asm). -# define DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(ret_type, func, ...) \ - extern "C" ret_type WRAP(func)(__VA_ARGS__); \ - extern "C" ret_type func(__VA_ARGS__); +# define DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(ret_type, func, ...) \ + extern "C" ret_type TRAMPOLINE(func)(__VA_ARGS__); \ + extern "C" ret_type WRAP(func)(__VA_ARGS__); \ + extern "C" ret_type func(__VA_ARGS__); #else # define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) # define DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(ret_type, func, ...) @@ -215,12 +293,10 @@ const interpose_substitution substitution_##func_name[] \ #elif !SANITIZER_APPLE -#define INTERCEPTOR(ret_type, func, ...) \ - DEFINE_REAL(ret_type, func, __VA_ARGS__) \ - DECLARE_WRAPPER(ret_type, func, __VA_ARGS__) \ - extern "C" \ - INTERCEPTOR_ATTRIBUTE \ - ret_type WRAP(func)(__VA_ARGS__) +#define INTERCEPTOR(ret_type, func, ...) \ + DEFINE_REAL(ret_type, func, __VA_ARGS__) \ + DECLARE_WRAPPER(ret_type, func, __VA_ARGS__) \ + extern "C" INTERCEPTOR_ATTRIBUTE ret_type WRAP(func)(__VA_ARGS__) // We don't need INTERCEPTOR_WITH_SUFFIX on non-Darwin for now. #define INTERCEPTOR_WITH_SUFFIX(ret_type, func, ...) \ @@ -228,10 +304,10 @@ const interpose_substitution substitution_##func_name[] \ #else // SANITIZER_APPLE -#define INTERCEPTOR_ZZZ(suffix, ret_type, func, ...) \ - extern "C" ret_type func(__VA_ARGS__) suffix; \ - extern "C" ret_type WRAP(func)(__VA_ARGS__); \ - INTERPOSER(func); \ +#define INTERCEPTOR_ZZZ(suffix, ret_type, func, ...) \ + extern "C" ret_type func(__VA_ARGS__) suffix; \ + extern "C" ret_type WRAP(func)(__VA_ARGS__); \ + INTERPOSER(func); \ extern "C" INTERCEPTOR_ATTRIBUTE ret_type WRAP(func)(__VA_ARGS__) #define INTERCEPTOR(ret_type, func, ...) \ @@ -246,14 +322,12 @@ const interpose_substitution substitution_##func_name[] \ #endif #if SANITIZER_WINDOWS -# define INTERCEPTOR_WINAPI(ret_type, func, ...) \ +# define INTERCEPTOR_WINAPI(ret_type, func, ...) \ typedef ret_type (__stdcall *FUNC_TYPE(func))(__VA_ARGS__); \ - namespace __interception { \ - FUNC_TYPE(func) PTR_TO_REAL(func); \ - } \ - extern "C" \ - INTERCEPTOR_ATTRIBUTE \ - ret_type __stdcall WRAP(func)(__VA_ARGS__) + namespace __interception { \ + FUNC_TYPE(func) PTR_TO_REAL(func); \ + } \ + extern "C" INTERCEPTOR_ATTRIBUTE ret_type __stdcall WRAP(func)(__VA_ARGS__) #endif // ISO C++ forbids casting between pointer-to-function and pointer-to-object, diff --git a/libsanitizer/interception/interception_linux.cpp b/libsanitizer/interception/interception_linux.cpp index 5111a87..ef8136e 100644 --- a/libsanitizer/interception/interception_linux.cpp +++ b/libsanitizer/interception/interception_linux.cpp @@ -33,7 +33,7 @@ static int StrCmp(const char *s1, const char *s2) { } #endif -static void *GetFuncAddr(const char *name, uptr wrapper_addr) { +static void *GetFuncAddr(const char *name, uptr trampoline) { #if SANITIZER_NETBSD // FIXME: Find a better way to handle renames if (StrCmp(name, "sigaction")) @@ -50,17 +50,17 @@ static void *GetFuncAddr(const char *name, uptr wrapper_addr) { // In case `name' is not loaded, dlsym ends up finding the actual wrapper. // We don't want to intercept the wrapper and have it point to itself. - if ((uptr)addr == wrapper_addr) + if ((uptr)addr == trampoline) addr = nullptr; } return addr; } bool InterceptFunction(const char *name, uptr *ptr_to_real, uptr func, - uptr wrapper) { - void *addr = GetFuncAddr(name, wrapper); + uptr trampoline) { + void *addr = GetFuncAddr(name, trampoline); *ptr_to_real = (uptr)addr; - return addr && (func == wrapper); + return addr && (func == trampoline); } // dlvsym is a GNU extension supported by some other platforms. @@ -70,12 +70,12 @@ static void *GetFuncAddr(const char *name, const char *ver) { } bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real, - uptr func, uptr wrapper) { + uptr func, uptr trampoline) { void *addr = GetFuncAddr(name, ver); *ptr_to_real = (uptr)addr; - return addr && (func == wrapper); + return addr && (func == trampoline); } -#endif // SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD +# endif // SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD } // namespace __interception diff --git a/libsanitizer/interception/interception_linux.h b/libsanitizer/interception/interception_linux.h index a08f8cb..433a3d9 100644 --- a/libsanitizer/interception/interception_linux.h +++ b/libsanitizer/interception/interception_linux.h @@ -15,7 +15,7 @@ SANITIZER_SOLARIS #if !defined(INCLUDED_FROM_INTERCEPTION_LIB) -# error "interception_linux.h should be included from interception library only" +# error interception_linux.h should be included from interception library only #endif #ifndef INTERCEPTION_LINUX_H @@ -23,26 +23,26 @@ namespace __interception { bool InterceptFunction(const char *name, uptr *ptr_to_real, uptr func, - uptr wrapper); + uptr trampoline); bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real, - uptr func, uptr wrapper); + uptr func, uptr trampoline); } // namespace __interception #define INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) \ ::__interception::InterceptFunction( \ #func, \ - (::__interception::uptr *) & REAL(func), \ - (::__interception::uptr) & (func), \ - (::__interception::uptr) & WRAP(func)) + (::__interception::uptr *)&REAL(func), \ + (::__interception::uptr)&(func), \ + (::__interception::uptr)&TRAMPOLINE(func)) // dlvsym is a GNU extension supported by some other platforms. #if SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD #define INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) \ ::__interception::InterceptFunction( \ #func, symver, \ - (::__interception::uptr *) & REAL(func), \ - (::__interception::uptr) & (func), \ - (::__interception::uptr) & WRAP(func)) + (::__interception::uptr *)&REAL(func), \ + (::__interception::uptr)&(func), \ + (::__interception::uptr)&TRAMPOLINE(func)) #else #define INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) \ INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) diff --git a/libsanitizer/interception/interception_win.cpp b/libsanitizer/interception/interception_win.cpp index faaa8ee..1b681ad 100644 --- a/libsanitizer/interception/interception_win.cpp +++ b/libsanitizer/interception/interception_win.cpp @@ -1,4 +1,4 @@ -//===-- interception_linux.cpp ----------------------------------*- C++ -*-===// +//===-- interception_win.cpp ------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -141,8 +141,29 @@ static const int kBranchLength = FIRST_32_SECOND_64(kJumpInstructionLength, kIndirectJumpInstructionLength); static const int kDirectBranchLength = kBranchLength + kAddressLength; +# if defined(_MSC_VER) +# define INTERCEPTION_FORMAT(f, a) +# else +# define INTERCEPTION_FORMAT(f, a) __attribute__((format(printf, f, a))) +# endif + +static void (*ErrorReportCallback)(const char *format, ...) + INTERCEPTION_FORMAT(1, 2); + +void SetErrorReportCallback(void (*callback)(const char *format, ...)) { + ErrorReportCallback = callback; +} + +# define ReportError(...) \ + do { \ + if (ErrorReportCallback) \ + ErrorReportCallback(__VA_ARGS__); \ + } while (0) + static void InterceptionFailed() { - // Do we have a good way to abort with an error message here? + ReportError("interception_win: failed due to an unrecoverable error.\n"); + // This acts like an abort when no debugger is attached. According to an old + // comment, calling abort() leads to an infinite recursion in CheckFailed. __debugbreak(); } @@ -249,8 +270,13 @@ static void WritePadding(uptr from, uptr size) { } static void WriteJumpInstruction(uptr from, uptr target) { - if (!DistanceIsWithin2Gig(from + kJumpInstructionLength, target)) + if (!DistanceIsWithin2Gig(from + kJumpInstructionLength, target)) { + ReportError( + "interception_win: cannot write jmp further than 2GB away, from %p to " + "%p.\n", + (void *)from, (void *)target); InterceptionFailed(); + } ptrdiff_t offset = target - from - kJumpInstructionLength; *(u8*)from = 0xE9; *(u32*)(from + 1) = offset; @@ -274,6 +300,10 @@ static void WriteIndirectJumpInstruction(uptr from, uptr indirect_target) { int offset = indirect_target - from - kIndirectJumpInstructionLength; if (!DistanceIsWithin2Gig(from + kIndirectJumpInstructionLength, indirect_target)) { + ReportError( + "interception_win: cannot write indirect jmp with target further than " + "2GB away, from %p to %p.\n", + (void *)from, (void *)indirect_target); InterceptionFailed(); } *(u16*)from = 0x25FF; @@ -427,6 +457,11 @@ static const u8 kPrologueWithShortJump2[] = { // Returns 0 on error. static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { +#if SANITIZER_ARM64 + // An ARM64 instruction is 4 bytes long. + return 4; +#endif + #if SANITIZER_WINDOWS64 if (memcmp((u8*)address, kPrologueWithShortJump1, sizeof(kPrologueWithShortJump1)) == 0 || @@ -492,6 +527,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { case 0xFF8B: // 8B FF : mov edi, edi case 0xEC8B: // 8B EC : mov ebp, esp case 0xc889: // 89 C8 : mov eax, ecx + case 0xE589: // 89 E5 : mov ebp, esp case 0xC18B: // 8B C1 : mov eax, ecx case 0xC033: // 33 C0 : xor eax, eax case 0xC933: // 33 C9 : xor ecx, ecx @@ -588,7 +624,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { // mov rax, QWORD PTR [rip + XXXXXXXX] case 0x25ff48: // 48 ff 25 XX XX XX XX : // rex.W jmp QWORD PTR [rip + XXXXXXXX] - + case 0x158D4C: // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX] // Instructions having offset relative to 'rip' need offset adjustment. if (rel_offset) *rel_offset = 3; @@ -641,6 +677,8 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { case 0x24448B: // 8B 44 24 XX : mov eax, dword ptr [esp + XX] case 0x244C8B: // 8B 4C 24 XX : mov ecx, dword ptr [esp + XX] case 0x24548B: // 8B 54 24 XX : mov edx, dword ptr [esp + XX] + case 0x245C8B: // 8B 5C 24 XX : mov ebx, dword ptr [esp + XX] + case 0x246C8B: // 8B 6C 24 XX : mov ebp, dword ptr [esp + XX] case 0x24748B: // 8B 74 24 XX : mov esi, dword ptr [esp + XX] case 0x247C8B: // 8B 7C 24 XX : mov edi, dword ptr [esp + XX] return 4; @@ -652,12 +690,20 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { } #endif - // Unknown instruction! - // FIXME: Unknown instruction failures might happen when we add a new - // interceptor or a new compiler version. In either case, they should result - // in visible and readable error messages. However, merely calling abort() - // leads to an infinite recursion in CheckFailed. - InterceptionFailed(); + // Unknown instruction! This might happen when we add a new interceptor, use + // a new compiler version, or if Windows changed how some functions are + // compiled. In either case, we print the address and 8 bytes of instructions + // to notify the user about the error and to help identify the unknown + // instruction. Don't treat this as a fatal error, though we can break the + // debugger if one has been attached. + u8 *bytes = (u8 *)address; + ReportError( + "interception_win: unhandled instruction at %p: %02x %02x %02x %02x %02x " + "%02x %02x %02x\n", + (void *)address, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], + bytes[5], bytes[6], bytes[7]); + if (::IsDebuggerPresent()) + __debugbreak(); return 0; } @@ -678,16 +724,24 @@ static bool CopyInstructions(uptr to, uptr from, size_t size) { while (cursor != size) { size_t rel_offset = 0; size_t instruction_size = GetInstructionSize(from + cursor, &rel_offset); - _memcpy((void*)(to + cursor), (void*)(from + cursor), + if (!instruction_size) + return false; + _memcpy((void *)(to + cursor), (void *)(from + cursor), (size_t)instruction_size); if (rel_offset) { - uptr delta = to - from; - uptr relocated_offset = *(u32*)(to + cursor + rel_offset) - delta; -#if SANITIZER_WINDOWS64 - if (relocated_offset + 0x80000000U >= 0xFFFFFFFFU) +# if SANITIZER_WINDOWS64 + // we want to make sure that the new relative offset still fits in 32-bits + // this will be untrue if relocated_offset \notin [-2**31, 2**31) + s64 delta = to - from; + s64 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta; + if (-0x8000'0000ll > relocated_offset || relocated_offset > 0x7FFF'FFFFll) return false; -#endif - *(u32*)(to + cursor + rel_offset) = relocated_offset; +# else + // on 32-bit, the relative offset will always be correct + s32 delta = to - from; + s32 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta; +# endif + *(s32 *)(to + cursor + rel_offset) = relocated_offset; } cursor += instruction_size; } @@ -895,6 +949,10 @@ static void **InterestingDLLsAvailable() { "msvcr120.dll", // VS2013 "vcruntime140.dll", // VS2015 "ucrtbase.dll", // Universal CRT +#if (defined(__MINGW32__) && defined(__i386__)) + "libc++.dll", // libc++ + "libunwind.dll", // libunwind +#endif // NTDLL should go last as it exports some functions that we should // override in the CRT [presumably only used internally]. "ntdll.dll", NULL}; diff --git a/libsanitizer/interception/interception_win.h b/libsanitizer/interception/interception_win.h index 45900130..f6eca82 100644 --- a/libsanitizer/interception/interception_win.h +++ b/libsanitizer/interception/interception_win.h @@ -41,6 +41,11 @@ bool OverrideImportedFunction(const char *module_to_patch, const char *function_name, uptr new_function, uptr *orig_old_func); +// Sets a callback to be used for reporting errors by interception_win. The +// callback will be called with printf-like arguments. Intended to be used with +// __sanitizer::Report. Pass nullptr to disable error reporting (default). +void SetErrorReportCallback(void (*callback)(const char *format, ...)); + #if !SANITIZER_WINDOWS64 // Exposed for unittests bool OverrideFunctionWithDetour( |