aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Drake <cygwin@jdrake.com>2025-03-18 17:49:56 -0700
committerJeremy Drake <github@jdrake.com>2025-03-31 13:50:12 -0700
commitc23bcc8a904df50936c6f7d402c8579a273be5a6 (patch)
tree8c70b1b01f2f6203de5dc0a95e7e8a13f2625344
parent85f5a86cedb78373d70e5b287a96c3a8e9b88277 (diff)
downloadnewlib-c23bcc8a904df50936c6f7d402c8579a273be5a6.zip
newlib-c23bcc8a904df50936c6f7d402c8579a273be5a6.tar.gz
newlib-c23bcc8a904df50936c6f7d402c8579a273be5a6.tar.bz2
Cygwin: use udis86 to find fast cwd pointer on x64
This makes find_fast_cwd_pointer more resiliant in the face of changes to the generated code in ntdll. Signed-off-by: Jeremy Drake <cygwin@jdrake.com>
-rw-r--r--winsup/cygwin/x86_64/fastcwd.cc230
1 files changed, 151 insertions, 79 deletions
diff --git a/winsup/cygwin/x86_64/fastcwd.cc b/winsup/cygwin/x86_64/fastcwd.cc
index 6bb8c22..f5a8755 100644
--- a/winsup/cygwin/x86_64/fastcwd.cc
+++ b/winsup/cygwin/x86_64/fastcwd.cc
@@ -7,10 +7,29 @@
details. */
#include "winsup.h"
+#include <assert.h>
+#include "udis86/types.h"
+#include "udis86/extern.h"
class fcwd_access_t;
-#define peek32(x) (*(int32_t *)(x))
+/* Helper function to get the absolute address of an rip-relative instruction
+ by summing the current instruction's pc (rip), the current instruction's
+ length, and the signed 32-bit displacement in the operand. Optionally, an
+ additional offset is subtracted to deal with the case where a member of a
+ struct is being referenced by the instruction but the address of the struct
+ is desired.
+*/
+static inline const void *
+rip_rel_offset (const ud_t *ud_obj, const ud_operand_t *opr, int sub_off=0)
+{
+ assert ((opr->type == UD_OP_JIMM && opr->size == 32) ||
+ (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
+ opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32));
+
+ return (const void *) (ud_insn_off (ud_obj) + ud_insn_len (ud_obj) +
+ opr->lval.sdword - sub_off);
+}
/* This function scans the code in ntdll.dll to find the address of the
global variable used to access the CWD. While the pointer is global,
@@ -30,99 +49,152 @@ find_fast_cwd_pointer_x86_64 ()
GetProcAddress (ntdll, "RtlEnterCriticalSection");
if (!get_dir || !ent_crit)
return NULL;
+ /* Initialize udis86 */
+ ud_t ud_obj;
+ ud_init (&ud_obj);
+ /* Set 64-bit mode */
+ ud_set_mode (&ud_obj, 64);
+ ud_set_input_buffer (&ud_obj, get_dir, 80);
+ /* Set pc (rip) so that subsequent calls to ud_insn_off will return the pc of
+ the instruction, saving us the hassle of tracking it ourselves */
+ ud_set_pc (&ud_obj, (uint64_t) get_dir);
+ const ud_operand_t *opr, *opr0;
+ ud_mnemonic_code_t insn;
+ ud_type_t reg = UD_NONE;
/* Search first relative call instruction in RtlGetCurrentDirectory_U. */
- const uint8_t *rcall = (const uint8_t *) memchr (get_dir, 0xe8, 80);
- if (!rcall)
+ const uint8_t *use_cwd = NULL;
+ while (ud_disassemble (&ud_obj) &&
+ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
+ insn != UD_Ijmp)
+ {
+ if (insn == UD_Icall)
+ {
+ opr = ud_insn_opr (&ud_obj, 0);
+ if (opr->type == UD_OP_JIMM && opr->size == 32)
+ {
+ /* Fetch offset from instruction and compute address of called
+ function. This function actually fetches the current FAST_CWD
+ instance and performs some other actions, not important to us.
+ */
+ use_cwd = (const uint8_t *) rip_rel_offset (&ud_obj, opr);
+ break;
+ }
+ }
+ }
+ if (!use_cwd)
return NULL;
- /* Fetch offset from instruction and compute address of called function.
- This function actually fetches the current FAST_CWD instance and
- performs some other actions, not important to us. */
- const uint8_t *use_cwd = rcall + 5 + peek32 (rcall + 1);
+ ud_set_input_buffer (&ud_obj, use_cwd, 120);
+ ud_set_pc (&ud_obj, (uint64_t) use_cwd);
+
/* Next we search for the locking mechanism and perform a sanity check.
- On Pre-Windows 8 we basically look for the RtlEnterCriticalSection call.
- Windows 8 does not call RtlEnterCriticalSection. The code manipulates
- the FastPebLock manually, probably because RtlEnterCriticalSection has
- been converted to an inline function. Either way, we test if the code
- uses the FastPebLock. */
- const uint8_t *movrbx;
- const uint8_t *lock = (const uint8_t *)
- memmem ((const char *) use_cwd, 80,
- "\xf0\x0f\xba\x35", 4);
- if (lock)
+ we basically look for the RtlEnterCriticalSection call and test if the
+ code uses the FastPebLock. */
+ PRTL_CRITICAL_SECTION lockaddr = NULL;
+
+ while (ud_disassemble (&ud_obj) &&
+ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
+ insn != UD_Ijmp)
{
- /* The lock instruction tweaks the LockCount member, which is not at
- the start of the PRTL_CRITICAL_SECTION structure. So we have to
- subtract the offset of LockCount to get the real address. */
- PRTL_CRITICAL_SECTION lockaddr =
- (PRTL_CRITICAL_SECTION) (lock + 9 + peek32 (lock + 4)
- - offsetof (RTL_CRITICAL_SECTION, LockCount));
- /* Test if lock address is FastPebLock. */
- if (lockaddr != NtCurrentTeb ()->Peb->FastPebLock)
- return NULL;
- /* Search `mov rel(%rip),%rbx'. This is the instruction fetching the
- address of the current fcwd_access_t pointer, and it should be pretty
- near to the locking stuff. */
- movrbx = (const uint8_t *) memmem ((const char *) lock, 40,
- "\x48\x8b\x1d", 3);
+ if (insn == UD_Ilea)
+ {
+ /* udis86 seems to follow intel syntax, in that operand 0 is the
+ dest and 1 is the src */
+ opr0 = ud_insn_opr (&ud_obj, 0);
+ opr = ud_insn_opr (&ud_obj, 1);
+ if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
+ opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32 &&
+ opr0->type == UD_OP_REG && opr0->size == 64)
+ {
+ lockaddr = (PRTL_CRITICAL_SECTION) rip_rel_offset (&ud_obj, opr);
+ reg = opr0->base;
+ break;
+ }
+ }
}
- else
+
+ /* Test if lock address is FastPebLock. */
+ if (lockaddr != NtCurrentTeb ()->Peb->FastPebLock)
+ return NULL;
+
+ /* Find where the lock address is loaded into rcx as the first parameter of
+ a function call */
+ bool found = false;
+ if (reg != UD_R_RCX)
{
- /* Usually the callq RtlEnterCriticalSection follows right after
- fetching the lock address. */
- int call_rtl_offset = 7;
- /* Search `lea rel(%rip),%rcx'. This loads the address of the lock into
- %rcx for the subsequent RtlEnterCriticalSection call. */
- lock = (const uint8_t *) memmem ((const char *) use_cwd, 80,
- "\x48\x8d\x0d", 3);
- if (!lock)
+ while (ud_disassemble (&ud_obj) &&
+ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
+ insn != UD_Ijmp)
{
- /* Windows 8.1 Preview calls `lea rel(rip),%r12' then some unrelated
- ops, then `mov %r12,%rcx', then `callq RtlEnterCriticalSection'. */
- lock = (const uint8_t *) memmem ((const char *) use_cwd, 80,
- "\x4c\x8d\x25", 3);
- call_rtl_offset = 14;
+ if (insn == UD_Imov)
+ {
+ opr0 = ud_insn_opr (&ud_obj, 0);
+ opr = ud_insn_opr (&ud_obj, 1);
+ if (opr->type == UD_OP_REG && opr->size == 64 &&
+ opr->base == reg && opr0->type == UD_OP_REG &&
+ opr0->size == 64 && opr0->base == UD_R_RCX)
+ {
+ found = true;
+ break;
+ }
+ }
}
+ if (!found)
+ return NULL;
+ }
- if (!lock)
+ /* Next is the `callq RtlEnterCriticalSection' */
+ found = false;
+ while (ud_disassemble (&ud_obj) &&
+ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
+ insn != UD_Ijmp)
+ {
+ if (insn == UD_Icall)
{
- /* A recent Windows 11 Preview calls `lea rel(rip),%r13' then
- some unrelated instructions, then `callq RtlEnterCriticalSection'.
- */
- lock = (const uint8_t *) memmem ((const char *) use_cwd, 80,
- "\x4c\x8d\x2d", 3);
- call_rtl_offset = 24;
+ opr = ud_insn_opr (&ud_obj, 0);
+ if (opr->type == UD_OP_JIMM && opr->size == 32)
+ {
+ if (ent_crit != rip_rel_offset (&ud_obj, opr))
+ return NULL;
+ found = true;
+ break;
+ }
}
+ }
+ if (!found)
+ return NULL;
- if (!lock)
+ fcwd_access_t **f_cwd_ptr = NULL;
+ /* now we're looking for a mov rel(%rip), %<reg64> */
+ while (ud_disassemble (&ud_obj) &&
+ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
+ insn != UD_Ijmp)
+ {
+ if (insn == UD_Imov)
{
- return NULL;
+ opr0 = ud_insn_opr (&ud_obj, 0);
+ opr = ud_insn_opr (&ud_obj, 1);
+ if (opr->type == UD_OP_MEM && opr->size == 64 &&
+ opr->base == UD_R_RIP && opr->index == UD_NONE &&
+ opr->scale == 0 && opr->offset == 32 &&
+ opr0->type == UD_OP_REG && opr0->size == 64)
+ {
+ f_cwd_ptr = (fcwd_access_t **) rip_rel_offset (&ud_obj, opr);
+ reg = opr0->base;
+ break;
+ }
}
-
- PRTL_CRITICAL_SECTION lockaddr =
- (PRTL_CRITICAL_SECTION) (lock + 7 + peek32 (lock + 3));
- /* Test if lock address is FastPebLock. */
- if (lockaddr != NtCurrentTeb ()->Peb->FastPebLock)
- return NULL;
- /* Next is the `callq RtlEnterCriticalSection'. */
- lock += call_rtl_offset;
- if (lock[0] != 0xe8)
- return NULL;
- const uint8_t *call_addr = (const uint8_t *)
- (lock + 5 + peek32 (lock + 1));
- if (call_addr != ent_crit)
- return NULL;
- /* In contrast to the above Windows 8 code, we don't have to search
- for the `mov rel(%rip),%rbx' instruction. It follows right after
- the call to RtlEnterCriticalSection. */
- movrbx = lock + 5;
}
- if (!movrbx)
+ /* Check that the next instruction is a test. */
+ if (!f_cwd_ptr || !ud_disassemble (&ud_obj) ||
+ ud_insn_mnemonic (&ud_obj) != UD_Itest)
return NULL;
- /* Check that the next instruction tests if the fetched value is NULL. */
- const uint8_t *testrbx = (const uint8_t *)
- memmem (movrbx + 7, 3, "\x48\x85\xdb", 3);
- if (!testrbx)
+
+ /* ... and that it's testing the same register that the mov above loaded the
+ f_cwd_ptr into against itself */
+ opr0 = ud_insn_opr (&ud_obj, 0);
+ opr = ud_insn_opr (&ud_obj, 1);
+ if (opr->type != UD_OP_REG || opr->size != 64 || opr->base != reg ||
+ opr0->type != opr->type || opr0->size != 64 || opr0->base != opr->base)
return NULL;
- /* Compute address of the fcwd_access_t ** pointer. */
- return (fcwd_access_t **) (testrbx + peek32 (movrbx + 3));
+ return f_cwd_ptr;
}