aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Drake <cygwin@jdrake.com>2025-03-21 13:29:00 -0700
committerJeremy Drake <github@jdrake.com>2025-03-31 13:50:12 -0700
commit2c5f25035d9fba128fb766a371a609b447e0873c (patch)
tree6cf3b789d86dbf7b670ca33d7ac2df945a845771
parentc23bcc8a904df50936c6f7d402c8579a273be5a6 (diff)
downloadnewlib-2c5f25035d9fba128fb766a371a609b447e0873c.zip
newlib-2c5f25035d9fba128fb766a371a609b447e0873c.tar.gz
newlib-2c5f25035d9fba128fb766a371a609b447e0873c.tar.bz2
Cygwin: add find_fast_cwd_pointer_aarch64.
This works for aarch64 hosts when the target is aarch64, x86_64, or i686, with only a small #if block in one function that needs to care. Signed-off-by: Jeremy Drake <cygwin@jdrake.com>
-rw-r--r--winsup/cygwin/Makefile.am1
-rw-r--r--winsup/cygwin/aarch64/fastcwd.cc207
-rw-r--r--winsup/cygwin/path.cc27
3 files changed, 229 insertions, 6 deletions
diff --git a/winsup/cygwin/Makefile.am b/winsup/cygwin/Makefile.am
index fdd026a..6438a41 100644
--- a/winsup/cygwin/Makefile.am
+++ b/winsup/cygwin/Makefile.am
@@ -52,6 +52,7 @@ if TARGET_X86_64
TARGET_FILES= \
x86_64/bcopy.S \
x86_64/fastcwd.cc \
+ aarch64/fastcwd.cc \
x86_64/memchr.S \
x86_64/memcpy.S \
x86_64/memmove.S \
diff --git a/winsup/cygwin/aarch64/fastcwd.cc b/winsup/cygwin/aarch64/fastcwd.cc
new file mode 100644
index 0000000..a85c539
--- /dev/null
+++ b/winsup/cygwin/aarch64/fastcwd.cc
@@ -0,0 +1,207 @@
+/* aarch64/fastcwd.cc: find the fast cwd pointer on aarch64 hosts.
+
+ This file is part of Cygwin.
+
+ This software is a copyrighted work licensed under the terms of the
+ Cygwin license. Please consult the file "CYGWIN_LICENSE" for
+ details. */
+
+/* You might well wonder why this file is included in x86_64 target files
+ in Makefile.am. It turns out that this code works when built for i686,
+ x86_64, or aarch64 with just the small #if/#elif block in
+ GetArm64ProcAddress below caring which. */
+
+#include "winsup.h"
+#include <assert.h>
+
+class fcwd_access_t;
+
+static LPCVOID
+GetArm64ProcAddress (HMODULE hModule, LPCSTR procname)
+{
+ const BYTE *proc = (const BYTE *) GetProcAddress (hModule, procname);
+#if defined (__aarch64__)
+ return proc;
+#else
+#if defined (__i386__)
+ static const BYTE thunk[] = "\x8b\xff\x55\x8b\xec\x5d\x90\xe9";
+ static const BYTE thunk2[0];
+#elif defined (__x86_64__)
+ /* see
+ https://learn.microsoft.com/en-us/windows/arm/arm64ec-abi#fast-forward-sequences */
+ static const BYTE thunk[] = "\x48\x8b\xc4\x48\x89\x58\x20\x55\x5d\xe9";
+ /* on windows 11 22000 the thunk is different than documented on that page */
+ static const BYTE thunk2[] = "\x48\x8b\xff\x55\x48\x8b\xec\x5d\x90\xe9";
+#else
+#error "Unhandled architecture for thunk detection"
+#endif
+ if (memcmp (proc, thunk, sizeof (thunk) - 1) == 0 ||
+ (sizeof(thunk2) && memcmp (proc, thunk2, sizeof (thunk2) - 1) == 0))
+ {
+ proc += sizeof (thunk) - 1;
+ proc += 4 + *(const int32_t *) proc;
+ }
+ return proc;
+#endif
+}
+
+/* these ids and masks, as well as the names of the various other parts of
+ instructions used in this file, came from
+ https://developer.arm.com/documentation/ddi0602/2024-09/Index-by-Encoding
+ (Arm A-profile A64 Instruction Set Architecture)
+*/
+#define IS_INSN(pc, name) ((*(pc) & name##_mask) == name##_id)
+static const uint32_t add_id = 0x11000000;
+static const uint32_t add_mask = 0x7fc00000;
+static const uint32_t adrp_id = 0x90000000;
+static const uint32_t adrp_mask = 0x9f000000;
+static const uint32_t b_id = 0x14000000;
+static const uint32_t b_mask = 0xfc000000;
+static const uint32_t bl_id = 0x94000000;
+static const uint32_t bl_mask = 0xfc000000;
+/* matches both cbz and cbnz */
+static const uint32_t cbz_id = 0x34000000;
+static const uint32_t cbz_mask = 0x7e000000;
+static const uint32_t ldr_id = 0xb9400000;
+static const uint32_t ldr_mask = 0xbfc00000;
+/* matches both ret and br (which are the same except ret is a 'hint' that
+ it's a subroutine return */
+static const uint32_t ret_id = 0xd61f0000;
+static const uint32_t ret_mask = 0xffbffc1f;
+
+/* this would work for either bl or b, but we only use it for bl */
+static inline LPCVOID
+extract_bl_target (const uint32_t *pc)
+{
+ assert (IS_INSN (pc, bl) || IS_INSN (pc, b));
+ int32_t offset = *pc & ~bl_mask;
+ /* sign extend */
+ if (offset & (1 << 25))
+ offset |= bl_mask;
+ /* Note uint32_t * artithmatic will implicitly multiply the offset by 4 */
+ return pc + offset;
+}
+
+static inline uint64_t
+extract_adrp_address (const uint32_t *pc)
+{
+ assert (IS_INSN (pc, adrp));
+ uint64_t adrp_base = (uint64_t) pc & ~0xFFF;
+ int64_t adrp_imm = (*pc >> (5+19+5)) & 0x3;
+ adrp_imm |= ((*pc >> 5) & 0x7FFFF) << 2;
+ /* sign extend */
+ if (adrp_imm & (1 << 20))
+ adrp_imm |= ~((1 << 21) - 1);
+ adrp_imm <<= 12;
+ return adrp_base + adrp_imm;
+}
+
+/* This function scans the code in ntdll.dll to find the address of the
+ global variable used to access the CWD. While the pointer is global,
+ it's not exported from the DLL, unfortunately. Therefore we have to
+ use some knowledge to figure out the address. */
+
+fcwd_access_t **
+find_fast_cwd_pointer_aarch64 ()
+{
+ /* Fetch entry points of relevant functions in ntdll.dll. */
+ HMODULE ntdll = GetModuleHandle ("ntdll.dll");
+ if (!ntdll)
+ return NULL;
+ LPCVOID get_dir = GetArm64ProcAddress (ntdll, "RtlGetCurrentDirectory_U");
+ LPCVOID ent_crit = GetArm64ProcAddress (ntdll, "RtlEnterCriticalSection");
+ if (!get_dir || !ent_crit)
+ return NULL;
+
+ LPCVOID use_cwd = NULL;
+ const uint32_t *start = (const uint32_t *) get_dir;
+ const uint32_t *pc = start;
+ /* find the call to RtlpReferenceCurrentDirectory, and get its address */
+ for (; pc < start + 20 && !IS_INSN (pc, ret) && !IS_INSN (pc, b); pc++)
+ {
+ if (IS_INSN (pc, bl))
+ {
+ use_cwd = extract_bl_target (pc);
+ break;
+ }
+ }
+ if (!use_cwd)
+ return NULL;
+
+ start = pc = (const uint32_t *) use_cwd;
+
+ const uint32_t *ldrpc = NULL;
+ uint32_t ldroffset, ldrsz;
+ uint32_t ldrrn, ldrrd;
+
+ /* find the ldr (immediate unsigned offset) for RtlpCurDirRef */
+ for (; pc < start + 20 && !IS_INSN (pc, ret) && !IS_INSN (pc, b); pc++)
+ {
+ if (IS_INSN (pc, ldr))
+ {
+ ldrpc = pc;
+ ldrsz = (*pc & 0x40000000);
+ ldroffset = (*pc >> (5+5)) & 0xFFF;
+ ldroffset <<= ldrsz ? 3 : 2;
+ ldrrn = (*pc >> 5) & 0x1F;
+ ldrrd = *pc & 0x1F;
+ break;
+ }
+ }
+ if (ldrpc == NULL)
+ return NULL;
+
+ /* the next instruction after the ldr should be checking if it was NULL:
+ either a compare and branch if zero or not zero (hence why cbz_mask is 7e
+ instead of 7f) */
+ if (!IS_INSN (pc + 1, cbz) || (*(pc + 1) & 0x1F) != ldrrd
+ || (*(pc + 1) & 0x80000000) != (ldrsz << 1))
+ return NULL;
+
+ /* work backwards, find a bl to RtlEnterCriticalSection whose argument
+ is the fast peb lock */
+
+ for (pc = ldrpc; pc >= start; pc--)
+ {
+ if (IS_INSN (pc, bl) && extract_bl_target (pc) == ent_crit)
+ break;
+ }
+ uint32_t addoffset;
+ uint32_t addrn;
+ for (; pc >= start; pc--)
+ {
+ if (IS_INSN (pc, add) && (*pc & 0x1F) == 0)
+ {
+ addoffset = (*pc >> (5+5)) & 0xFFF;
+ addrn = (*pc >> 5) & 0x1F;
+ break;
+ }
+ }
+ PRTL_CRITICAL_SECTION lockaddr = NULL;
+ for (; pc >= start; pc--)
+ {
+ if (IS_INSN (pc, adrp) && (*pc & 0x1F) == addrn)
+ {
+ lockaddr = (PRTL_CRITICAL_SECTION) (extract_adrp_address (pc) +
+ addoffset);
+ break;
+ }
+ }
+ if (lockaddr != NtCurrentTeb ()->Peb->FastPebLock)
+ return NULL;
+
+ /* work backwards from the ldr to find the corresponding adrp */
+ fcwd_access_t **RtlpCurDirRef = NULL;
+ for (pc = ldrpc; pc >= start; pc--)
+ {
+ if (IS_INSN (pc, adrp) && (*pc & 0x1F) == ldrrn)
+ {
+ RtlpCurDirRef = (fcwd_access_t **) (extract_adrp_address (pc) +
+ ldroffset);
+ break;
+ }
+ }
+
+ return RtlpCurDirRef;
+}
+
diff --git a/winsup/cygwin/path.cc b/winsup/cygwin/path.cc
index 3a5e2ee..7a08e97 100644
--- a/winsup/cygwin/path.cc
+++ b/winsup/cygwin/path.cc
@@ -4495,21 +4495,36 @@ fcwd_access_t **
find_fast_cwd_pointer_x86_64 ();
#endif
+fcwd_access_t **
+find_fast_cwd_pointer_aarch64 ();
+
static fcwd_access_t **
find_fast_cwd ()
{
fcwd_access_t **f_cwd_ptr;
- /* First check if we're running on an ARM64 system. Skip
- fetching FAST_CWD pointer as long as there's no solution for finding
- it on that system. */
- if (wincap.host_machine () == IMAGE_FILE_MACHINE_ARM64)
- return NULL;
+ switch (wincap.host_machine ())
+ {
+ case IMAGE_FILE_MACHINE_ARM64:
+ f_cwd_ptr = find_fast_cwd_pointer_aarch64 ();
+ break;
+#ifdef __x86_64__
+ case IMAGE_FILE_MACHINE_AMD64:
+ f_cwd_ptr = find_fast_cwd_pointer_x86_64 ();
+ break;
+#endif
+ default:
+ small_printf ("Cygwin WARNING:\n"
+" Couldn't compute FAST_CWD pointer for an unknown architecture (%04y)\n"
+" Please update to the latest available Cygwin version from\n"
+" https://cygwin.com/. If the problem persists, please see\n"
+" https://cygwin.com/problems.html\n\n", (int) wincap.host_machine ());
+ return NULL;
+ }
/* Fetch the pointer but don't set the global fast_cwd_ptr yet. First
we have to make sure we know the version of the FAST_CWD structure
used on the system. */
- f_cwd_ptr = find_fast_cwd_pointer_x86_64 ();
if (!f_cwd_ptr)
small_printf ("Cygwin WARNING:\n"
" Couldn't compute FAST_CWD pointer. This typically occurs if you're using\n"