diff options
author | alekuz01 <aleksei.kuzmenko@arm.com> | 2025-04-15 12:59:05 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-15 12:59:05 +0100 |
commit | 38faf32d23d1edb7708711d0f9e0db853ddf75a5 (patch) | |
tree | 9efb49edd05bee489f2c800fa796e87eef6a01a3 | |
parent | 7eae1a4d1fd84e87eb31ea263d0df838cce8fa1b (diff) | |
download | llvm-38faf32d23d1edb7708711d0f9e0db853ddf75a5.zip llvm-38faf32d23d1edb7708711d0f9e0db853ddf75a5.tar.gz llvm-38faf32d23d1edb7708711d0f9e0db853ddf75a5.tar.bz2 |
[BOLT] Enable hugify for AArch64 (#117158)
Add required hugify instrumentation and runtime libraries support for AArch64.
Fixes #58226
Unblocks #62695
-rw-r--r-- | bolt/CMakeLists.txt | 2 | ||||
-rw-r--r-- | bolt/lib/Rewrite/RewriteInstance.cpp | 20 | ||||
-rw-r--r-- | bolt/runtime/common.h | 6 | ||||
-rw-r--r-- | bolt/runtime/hugify.cpp | 21 | ||||
-rw-r--r-- | bolt/test/runtime/Inputs/user_func_order.txt (renamed from bolt/test/runtime/X86/Inputs/user_func_order.txt) | 0 | ||||
-rw-r--r-- | bolt/test/runtime/hugify.c (renamed from bolt/test/runtime/X86/hugify.c) | 17 | ||||
-rw-r--r-- | bolt/test/runtime/user-func-reorder.c (renamed from bolt/test/runtime/X86/user-func-reorder.c) | 22 |
7 files changed, 67 insertions, 21 deletions
diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt index f5ffa81..73c3ab5 100644 --- a/bolt/CMakeLists.txt +++ b/bolt/CMakeLists.txt @@ -136,7 +136,7 @@ if (LLVM_INCLUDE_TESTS) endif() if (BOLT_ENABLE_RUNTIME) - message(STATUS "Building BOLT runtime libraries for X86") + message(STATUS "Building BOLT runtime libraries for ${CMAKE_SYSTEM_PROCESSOR}") set(extra_args "") if(CMAKE_SYSROOT) list(APPEND extra_args -DCMAKE_SYSROOT=${CMAKE_SYSROOT}) diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 70a175e..a03df3d 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -597,8 +597,9 @@ Error RewriteInstance::discoverStorage() { // Hugify: Additional huge page from left side due to // weird ASLR mapping addresses (4KB aligned) - if (opts::Hugify && !BC->HasFixedLoadAddress) + if (opts::Hugify && !BC->HasFixedLoadAddress) { NextAvailableAddress += BC->PageAlign; + } if (!opts::UseGnuStack && !BC->IsLinuxKernel) { // This is where the black magic happens. Creating PHDR table in a segment @@ -5885,17 +5886,28 @@ void RewriteInstance::rewriteFile() { // Write all allocatable sections - reloc-mode text is written here as well for (BinarySection &Section : BC->allocatableSections()) { - if (!Section.isFinalized() || !Section.getOutputData()) + if (!Section.isFinalized() || !Section.getOutputData()) { + LLVM_DEBUG(if (opts::Verbosity > 1) { + dbgs() << "BOLT-INFO: new section is finalized or !getOutputData, skip " + << Section.getName() << '\n'; + }); continue; - if (Section.isLinkOnly()) + } + if (Section.isLinkOnly()) { + LLVM_DEBUG(if (opts::Verbosity > 1) { + dbgs() << "BOLT-INFO: new section is link only, skip " + << Section.getName() << '\n'; + }); continue; + } if (opts::Verbosity >= 1) BC->outs() << "BOLT: writing new section " << Section.getName() << "\n data at 0x" << Twine::utohexstr(Section.getAllocAddress()) << "\n of size " << Section.getOutputSize() << "\n at offset " - << Section.getOutputFileOffset() << '\n'; + << Section.getOutputFileOffset() << " with content size " + << Section.getOutputContents().size() << '\n'; OS.seek(Section.getOutputFileOffset()); Section.write(OS); } diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h index 9b9965b..27d0830 100644 --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -151,10 +151,12 @@ struct timespec { uint64_t tv_nsec; /* nanoseconds */ }; -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(__arm64__) #include "sys_aarch64.h" -#else +#elif defined(__x86_64__) #include "sys_x86_64.h" +#else +#error "For AArch64/ARM64 and X86_64 only." #endif constexpr uint32_t BufSize = 10240; diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp index a89cba2..67d5fa2 100644 --- a/bolt/runtime/hugify.cpp +++ b/bolt/runtime/hugify.cpp @@ -6,7 +6,8 @@ // //===---------------------------------------------------------------------===// -#if defined (__x86_64__) && !defined(__APPLE__) +#if defined(__x86_64__) || \ + (defined(__aarch64__) || defined(__arm64__)) && !defined(__APPLE__) #include "common.h" @@ -73,8 +74,10 @@ static bool hasPagecacheTHPSupport() { if (Res < 0) return false; - if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) + if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) { + DEBUG(report("[hugify] THP support is not enabled.\n");) return false; + } struct KernelVersionTy { uint32_t major; @@ -167,12 +170,20 @@ extern "C" void __bolt_hugify_self_impl() { /// This is hooking ELF's entry, it needs to save all machine state. extern "C" __attribute((naked)) void __bolt_hugify_self() { + // clang-format off #if defined(__x86_64__) __asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL - "jmp __bolt_hugify_start_program\n" :: - :); + "jmp __bolt_hugify_start_program\n" + :::); +#elif defined(__aarch64__) || defined(__arm64__) + __asm__ __volatile__(SAVE_ALL "bl __bolt_hugify_self_impl\n" RESTORE_ALL + "adrp x16, __bolt_hugify_start_program\n" + "add x16, x16, #:lo12:__bolt_hugify_start_program\n" + "br x16\n" + :::); #else - exit(1); + __exit(1); #endif + // clang-format on } #endif diff --git a/bolt/test/runtime/X86/Inputs/user_func_order.txt b/bolt/test/runtime/Inputs/user_func_order.txt index 48b76cd..48b76cd 100644 --- a/bolt/test/runtime/X86/Inputs/user_func_order.txt +++ b/bolt/test/runtime/Inputs/user_func_order.txt diff --git a/bolt/test/runtime/X86/hugify.c b/bolt/test/runtime/hugify.c index cfc0cb6..a4a718a 100644 --- a/bolt/test/runtime/X86/hugify.c +++ b/bolt/test/runtime/hugify.c @@ -11,17 +11,28 @@ int main(int argc, char **argv) { REQUIRES: system-linux,bolt-runtime RUN: %clang %cflags -no-pie %s -o %t.nopie.exe -Wl,-q -RUN: %clang %cflags -fpic -pie %s -o %t.pie.exe -Wl,-q +RUN: %clang %cflags -fpic %s -o %t.pie.exe -Wl,-q RUN: llvm-bolt %t.nopie.exe --lite=0 -o %t.nopie --hugify RUN: llvm-bolt %t.pie.exe --lite=0 -o %t.pie --hugify +RUN: llvm-nm --numeric-sort --print-armap %t.nopie | \ +RUN: FileCheck %s -check-prefix=CHECK-NM RUN: %t.nopie | FileCheck %s -check-prefix=CHECK-NOPIE -CHECK-NOPIE: Hello world - +RUN: llvm-nm --numeric-sort --print-armap %t.pie | \ +RUN: FileCheck %s -check-prefix=CHECK-NM RUN: %t.pie | FileCheck %s -check-prefix=CHECK-PIE +CHECK-NM: W __hot_start +CHECK-NM-NEXT: T _start +CHECK-NM: T main +CHECK-NM: W __hot_end +CHECK-NM: t __bolt_hugify_start_program +CHECK-NM-NEXT: W __bolt_runtime_start + +CHECK-NOPIE: Hello world + CHECK-PIE: Hello world */ diff --git a/bolt/test/runtime/X86/user-func-reorder.c b/bolt/test/runtime/user-func-reorder.c index fcb92bc..c9e12ea 100644 --- a/bolt/test/runtime/X86/user-func-reorder.c +++ b/bolt/test/runtime/user-func-reorder.c @@ -5,9 +5,7 @@ */ #include <stdio.h> -int foo(int x) { - return x + 1; -} +int foo(int x) { return x + 1; } int fib(int x) { if (x < 2) @@ -15,9 +13,7 @@ int fib(int x) { return fib(x - 1) + fib(x - 2); } -int bar(int x) { - return x - 1; -} +int bar(int x) { return x - 1; } int main(int argc, char **argv) { printf("fib(%d) = %d\n", argc, fib(argc)); @@ -31,14 +27,28 @@ RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \ RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t +RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \ +RUN: --function-order=%p/Inputs/user_func_order.txt -o %t.nohugify RUN: llvm-nm --numeric-sort --print-armap %t | \ RUN: FileCheck %s -check-prefix=CHECK-NM RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT +RUN: llvm-nm --numeric-sort --print-armap %t.nohugify | \ +RUN: FileCheck %s -check-prefix=CHECK-NM-NOHUGIFY +RUN: %t.nohugify 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT-NOHUGIFY + CHECK-NM: W __hot_start CHECK-NM: T main CHECK-NM-NEXT: T fib CHECK-NM-NEXT: W __hot_end +CHECK-NM: t __bolt_hugify_start_program +CHECK-NM-NEXT: W __bolt_runtime_start + +CHECK-NM-NOHUGIFY: W __hot_start +CHECK-NM-NOHUGIFY: T main +CHECK-NM-NOHUGIFY-NEXT: T fib +CHECK-NM-NOHUGIFY-NEXT: W __hot_end CHECK-OUTPUT: fib(4) = 3 +CHECK-OUTPUT-NOHUGIFY: fib(4) = 3 */ |