aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFangrui Song <i@maskray.me>2024-07-02 10:58:24 -0700
committerGitHub <noreply@github.com>2024-07-02 10:58:24 -0700
commitfdd319655359b005889abf40d1d8a54fbd56059e (patch)
tree38f2d1e6f6c0a5de6df90efcbbbf28c2f91b42e7
parente852725e5d517195de247f30b62ad2c56717958a (diff)
downloadllvm-fdd319655359b005889abf40d1d8a54fbd56059e.zip
llvm-fdd319655359b005889abf40d1d8a54fbd56059e.tar.gz
llvm-fdd319655359b005889abf40d1d8a54fbd56059e.tar.bz2
[ELF] Make start/stop symbols retain associated discardable output sections
An empty output section specified in the `SECTIONS` command (e.g. `empty : { *(empty) }`) may be discarded. Due to phase ordering, we might define `__start_empty`/`__stop_empty` symbols with incorrect section indexes (usually benign, but could go out of bounds and cause `readelf -s` to print `BAD`). ``` finalizeSections addStartStopSymbols // __start_empty is defined // __start_empty is added to .symtab sortSections adjustOutputSections // `empty` is discarded writeSections // __start_empty is Defined with an invalid section index ``` Loaders use `st_value` members of the start/stop symbols and expect no "undefined symbol" linker error, but do not particularly care whether the symbols are defined or undefined. Let's retain the associated empty output section so that start/stop symbols will have correct section indexes. The approach allows us to remove `LinkerScript::isDiscarded` (https://reviews.llvm.org/D114179). Also delete the `findSection(".text")` special case from https://reviews.llvm.org/D46200, which is unnecessary even before this patch (`elfHeader` would be fine even with very large executables). Note: we should be careful not to unnecessarily retain .ARM.exidx, which would create an empty PT_ARM_EXIDX. ~40 tests would need to be updated. --- An alternative is to discard the empty output section and keep the start/stop symbols undefined. This approach needs more code and requires `LinkerScript::isDiscarded` before we discard empty sections in ``adjustOutputSections`. Pull Request: https://github.com/llvm/llvm-project/pull/96343
-rw-r--r--lld/ELF/LinkerScript.cpp5
-rw-r--r--lld/ELF/LinkerScript.h2
-rw-r--r--lld/ELF/Writer.cpp50
-rw-r--r--lld/test/ELF/linkerscript/empty-preinit-array-start-stop.test (renamed from lld/test/ELF/linkerscript/preinit-array-empty.test)9
-rw-r--r--lld/test/ELF/linkerscript/empty-section-start-stop.test36
-rw-r--r--lld/test/ELF/linkerscript/sections-gc2.s1
-rw-r--r--lld/test/ELF/pre_init_fini_array_missing.s29
7 files changed, 78 insertions, 54 deletions
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 1ec796a..e2208da 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -1184,11 +1184,6 @@ static bool isDiscardable(const OutputSection &sec) {
return true;
}
-bool LinkerScript::isDiscarded(const OutputSection *sec) const {
- return hasSectionsCommand && (getFirstInputSection(sec) == nullptr) &&
- isDiscardable(*sec);
-}
-
static void maybePropagatePhdrs(OutputSection &sec,
SmallVector<StringRef, 0> &phdrs) {
if (sec.phdrs.empty()) {
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index db26d02..43d0850e 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -342,8 +342,6 @@ public:
void processSymbolAssignments();
void declareSymbols();
- bool isDiscarded(const OutputSection *sec) const;
-
// Used to handle INSERT AFTER statements.
void processInsertCommands();
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index ba79431..bce3cd2 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -2064,33 +2064,21 @@ template <class ELFT> void Writer<ELFT>::checkExecuteOnly() {
// The linker is expected to define SECNAME_start and SECNAME_end
// symbols for a few sections. This function defines them.
template <class ELFT> void Writer<ELFT>::addStartEndSymbols() {
- // If a section does not exist, there's ambiguity as to how we
- // define _start and _end symbols for an init/fini section. Since
- // the loader assume that the symbols are always defined, we need to
- // always define them. But what value? The loader iterates over all
- // pointers between _start and _end to run global ctors/dtors, so if
- // the section is empty, their symbol values don't actually matter
- // as long as _start and _end point to the same location.
- //
- // That said, we don't want to set the symbols to 0 (which is
- // probably the simplest value) because that could cause some
- // program to fail to link due to relocation overflow, if their
- // program text is above 2 GiB. We use the address of the .text
- // section instead to prevent that failure.
- //
- // In rare situations, the .text section may not exist. If that's the
- // case, use the image base address as a last resort.
- OutputSection *Default = findSection(".text");
- if (!Default)
- Default = Out::elfHeader;
-
+ // If the associated output section does not exist, there is ambiguity as to
+ // how we define _start and _end symbols for an init/fini section. Users
+ // expect no "undefined symbol" linker errors and loaders expect equal
+ // st_value but do not particularly care whether the symbols are defined or
+ // not. We retain the output section so that the section indexes will be
+ // correct.
auto define = [=](StringRef start, StringRef end, OutputSection *os) {
- if (os && !script->isDiscarded(os)) {
- addOptionalRegular(start, os, 0);
- addOptionalRegular(end, os, -1);
+ if (os) {
+ Defined *startSym = addOptionalRegular(start, os, 0);
+ Defined *stopSym = addOptionalRegular(end, os, -1);
+ if (startSym || stopSym)
+ os->usedInExpression = true;
} else {
- addOptionalRegular(start, Default, 0);
- addOptionalRegular(end, Default, 0);
+ addOptionalRegular(start, Out::elfHeader, 0);
+ addOptionalRegular(end, Out::elfHeader, 0);
}
};
@@ -2098,6 +2086,8 @@ template <class ELFT> void Writer<ELFT>::addStartEndSymbols() {
define("__init_array_start", "__init_array_end", Out::initArray);
define("__fini_array_start", "__fini_array_end", Out::finiArray);
+ // As a special case, don't unnecessarily retain .ARM.exidx, which would
+ // create an empty PT_ARM_EXIDX.
if (OutputSection *sec = findSection(".ARM.exidx"))
define("__exidx_start", "__exidx_end", sec);
}
@@ -2112,10 +2102,12 @@ void Writer<ELFT>::addStartStopSymbols(OutputSection &osec) {
StringRef s = osec.name;
if (!isValidCIdentifier(s))
return;
- addOptionalRegular(saver().save("__start_" + s), &osec, 0,
- config->zStartStopVisibility);
- addOptionalRegular(saver().save("__stop_" + s), &osec, -1,
- config->zStartStopVisibility);
+ Defined *startSym = addOptionalRegular(saver().save("__start_" + s), &osec, 0,
+ config->zStartStopVisibility);
+ Defined *stopSym = addOptionalRegular(saver().save("__stop_" + s), &osec, -1,
+ config->zStartStopVisibility);
+ if (startSym || stopSym)
+ osec.usedInExpression = true;
}
static bool needsPtLoad(OutputSection *sec) {
diff --git a/lld/test/ELF/linkerscript/preinit-array-empty.test b/lld/test/ELF/linkerscript/empty-preinit-array-start-stop.test
index 696c8dd..0f6d2e4 100644
--- a/lld/test/ELF/linkerscript/preinit-array-empty.test
+++ b/lld/test/ELF/linkerscript/empty-preinit-array-start-stop.test
@@ -3,7 +3,8 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/t.s -o %t.o
## PR52534: https://bugs.llvm.org/show_bug.cgi?id=52534
-## Check case where .preinit_array is discarded.
+## Check case where .preinit_array would be discarded in the absence of the
+## start/stop symbols.
## Link should succeed without causing an out of range relocation error.
# RUN: ld.lld -T %t/discarded.script %t.o -o %t1 --image-base=0x80000000
# RUN: llvm-readelf -s %t1 | FileCheck --check-prefixes=CHECK,DISCARDED %s
@@ -15,7 +16,7 @@
# CHECK: [[#%x,ADDR:]] 0 NOTYPE LOCAL HIDDEN [[#]] __preinit_array_start
# CHECK-NEXT: [[#ADDR]] 0 NOTYPE LOCAL HIDDEN [[#]] __preinit_array_end
-# DISCARDED-NEXT: [[#ADDR]] 0 NOTYPE GLOBAL DEFAULT [[#]] _start
+# DISCARDED-NEXT: {{0*}}[[#ADDR-14]] 0 NOTYPE GLOBAL DEFAULT [[#]] _start
# EMPTY-NOT: [[#ADDR]] 0 NOTYPE GLOBAL DEFAULT [[#]] _start
# EMPTY: [[#ADDR]] 0 NOTYPE GLOBAL DEFAULT [[#]] ADDR
@@ -26,8 +27,12 @@ _start:
movq __preinit_array_start@GOTPCREL(%rip),%rax
movq __preinit_array_end@GOTPCREL(%rip),%rax
+.section .rodata,"a"
+.byte 0
+
#--- discarded.script
SECTIONS {
+ .rodata : { *(.rodata); }
.text : { *(.text); }
.preinit_array : { *(.preinit_array); }
}
diff --git a/lld/test/ELF/linkerscript/empty-section-start-stop.test b/lld/test/ELF/linkerscript/empty-section-start-stop.test
new file mode 100644
index 0000000..a32a936
--- /dev/null
+++ b/lld/test/ELF/linkerscript/empty-section-start-stop.test
@@ -0,0 +1,36 @@
+# REQUIRES: x86
+## __start/__stop symbols retain the associated empty sections with C identifier names.
+
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/test.s -o %t.o
+# RUN: ld.lld -T %t/ldscript -o %t.out %t.o -z start-stop-visibility=default
+# RUN: llvm-objdump -h -t %t.out | FileCheck %s
+
+# CHECK: .text
+# CHECK-NEXT: empty1
+# CHECK-NEXT: empty2
+# CHECK-NEXT: empty3
+
+# CHECK: [[#%x,ADDR:]] l empty1 0000000000000000 .hidden __start_empty1
+# CHECK-NEXT: {{0*}}[[#ADDR]] g empty2 0000000000000000 .protected __stop_empty2
+# CHECK-NEXT: {{0*}}[[#ADDR]] g empty3 0000000000000000 __stop_empty3
+
+#--- ldscript
+SECTIONS {
+ .text : { *(.text .text.*) }
+ empty0 : { *(empty0) }
+ empty1 : { *(empty1) }
+ empty2 : { *(empty2) }
+ empty3 : { *(empty3) }
+}
+
+#--- test.s
+.weak __start_empty1, __stop_empty2, __stop_empty3
+.hidden __start_empty1
+.protected __stop_empty2
+
+.globl _start
+_start:
+ movq __start_empty1@GOTPCREL(%rip),%rax
+ movq __stop_empty2@GOTPCREL(%rip),%rax
+ movq __stop_empty3@GOTPCREL(%rip),%rax
diff --git a/lld/test/ELF/linkerscript/sections-gc2.s b/lld/test/ELF/linkerscript/sections-gc2.s
index 76be65f..c2611a7 100644
--- a/lld/test/ELF/linkerscript/sections-gc2.s
+++ b/lld/test/ELF/linkerscript/sections-gc2.s
@@ -11,6 +11,7 @@
# CHECK: Idx Name Size VMA Type
# CHECK-NEXT: 0
# CHECK-NEXT: used_in_reloc
+# CHECK-NEXT: used_in_script
# CHECK-NEXT: .text
# CHECK-NEXT: .comment
# CHECK-NEXT: .symtab
diff --git a/lld/test/ELF/pre_init_fini_array_missing.s b/lld/test/ELF/pre_init_fini_array_missing.s
index 22cf5fe..a1c2e5d 100644
--- a/lld/test/ELF/pre_init_fini_array_missing.s
+++ b/lld/test/ELF/pre_init_fini_array_missing.s
@@ -14,29 +14,26 @@ _start:
call __fini_array_start
call __fini_array_end
-// With no .init_array section the symbols resolve to .text.
-// 0x201120 - (0x201120 + 5) = -5
-// 0x201120 - (0x201125 + 5) = -10
-// ...
+/// Due to __init_array_start/__init_array_end, .init_array is retained.
// CHECK: Disassembly of section .text:
// CHECK-EMPTY:
// CHECK-NEXT: <_start>:
-// CHECK-NEXT: 201120: callq 0x201120
-// CHECK-NEXT: callq 0x201120
-// CHECK-NEXT: callq 0x201120
-// CHECK-NEXT: callq 0x201120
-// CHECK-NEXT: callq 0x201120
-// CHECK-NEXT: callq 0x201120
+// CHECK-NEXT: 201120: callq 0x200000
+// CHECK-NEXT: callq 0x200000
+// CHECK-NEXT: callq 0x200000
+// CHECK-NEXT: callq 0x200000
+// CHECK-NEXT: callq 0x200000
+// CHECK-NEXT: callq 0x200000
// In position-independent binaries, they resolve to .text too.
// PIE: Disassembly of section .text:
// PIE-EMPTY:
// PIE-NEXT: <_start>:
-// PIE-NEXT: 1210: callq 0x1210
-// PIE-NEXT: callq 0x1210
-// PIE-NEXT: callq 0x1210
-// PIE-NEXT: callq 0x1210
-// PIE-NEXT: callq 0x1210
-// PIE-NEXT: callq 0x1210
+// PIE-NEXT: 1210: callq 0x0
+// PIE-NEXT: callq 0x0
+// PIE-NEXT: callq 0x0
+// PIE-NEXT: callq 0x0
+// PIE-NEXT: callq 0x0
+// PIE-NEXT: callq 0x0