aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/ThinLTO
diff options
context:
space:
mode:
authorFangrui Song <i@maskray.me>2023-08-29 12:26:40 -0700
committerFangrui Song <i@maskray.me>2023-08-29 12:26:40 -0700
commit6276927bf3f6ce4a9ef0b9941b2c6450ae4cd1eb (patch)
tree0412e5fad4769ad12aedfa8d7eb30dba40890b8f /llvm/test/ThinLTO
parent39191c45771564b8a0930d71b4c229147cf839db (diff)
downloadllvm-6276927bf3f6ce4a9ef0b9941b2c6450ae4cd1eb.zip
llvm-6276927bf3f6ce4a9ef0b9941b2c6450ae4cd1eb.tar.gz
llvm-6276927bf3f6ce4a9ef0b9941b2c6450ae4cd1eb.tar.bz2
[ThinLTO] Mark callers of local ifunc not eligible for import
Fix https://github.com/llvm/llvm-project/issues/58740 The `target_clones` attribute results in ifunc on eligible targets (Linux glibc/Android or FreeBSD). If the function has internal linkage, we will get an internal linkage ifunc. ``` __attribute__((target_clones("popcnt", "default"))) static int foo(int n) { return __builtin_popcount(n); } int use(int n) { return foo(n); } @foo.ifunc = internal ifunc i32 (i32), ptr @foo.resolver define internal nonnull ptr @foo.resolver() comdat { ; local linkage comdat is another issue that should be fixed ... select i1 %.not, ptr @foo.default.1, ptr @foo.popcnt.0 ... } define internal i32 @foo.default.1(i32 noundef %n) ``` ifuncs are not included in module summaries, so LTO doesn't know the local linkage `foo.default.1` referenced by `foo.resolver` should be promoted. If a caller of `foo` (e.g. `use`) is imported, the local linkage `foo.resolver` will be cloned as a definition (IRLinker::shouldLink), leading to linker errors. ``` ld.lld: error: undefined hidden symbol: foo.default.1.llvm.8017227050314953235 >>> referenced by bar.c >>> lto.tmp:(foo.ifunc) ``` As a simple fix, just mark `use` as not eligible for import. Non-local linkage ifuncs do not have the problem, because they are not imported, and not cloned when a caller is imported. --- https://reviews.llvm.org/D82745 contains a more involved fix, though the original bug it intended to fix (https://github.com/llvm/llvm-project/issues/45833) now works. Note: importing ifunc is tricky. If we import an ifunc, we need to make sure the resolver and the implementation are in the translation unit, as required by https://sourceware.org/glibc/wiki/GNU_IFUNC > Requirement (a): Resolver must be defined in the same translation unit as the implementations. This is infeasible if the implementation is changed to available_externally. In addition, the imported ifunc may be referenced by two translation units. This doesn't work with PowerPC32 -msecure-plt (https://maskray.me/blog/2021-01-18-gnu-indirect-function). At the very least, every referencing translation unit needs one extra IRELATIVE dynamic relocation. At least for the local linkage ifunc case, it doesn't have much use outside of `target_clones`, as a global pointer is usually a better replacement. I think ifuncs just have too many pitfalls to design more IR features around it to optimize them. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D158961
Diffstat (limited to 'llvm/test/ThinLTO')
-rw-r--r--llvm/test/ThinLTO/X86/ifunc-import.ll79
1 files changed, 79 insertions, 0 deletions
diff --git a/llvm/test/ThinLTO/X86/ifunc-import.ll b/llvm/test/ThinLTO/X86/ifunc-import.ll
new file mode 100644
index 0000000..8df43e2
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/ifunc-import.ll
@@ -0,0 +1,79 @@
+; RUN: rm -rf %t && split-file %s %t && cd %t
+; RUN: opt -module-summary -o a.bc a.ll
+; RUN: opt -module-summary -o b.bc b.ll
+; RUN: llvm-lto2 run a.bc b.bc -o t --save-temps \
+; RUN: -r a.bc,external.ifunc,pl -r a.bc,use,pl -r a.bc,use2,pl -r a.bc,__cpu_model,lx \
+; RUN: -r b.bc,main,plx -r b.bc,use,l -r b.bc,use2,l
+; RUN: llvm-dis < t.1.3.import.bc | FileCheck %s --check-prefix=A
+; RUN: llvm-dis < t.2.3.import.bc | FileCheck %s --check-prefix=B --implicit-check-not='@internal.resolver'
+
+; A: define internal nonnull ptr @internal.resolver()
+; A: define internal i32 @internal.default.1(i32 %n)
+
+;; The ifunc implementations of internal.ifunc are internal in A, so they cannot
+;; be referenced by B. Our implementation actually ensures that the ifunc resolver
+;; along with its implementations are not imported.
+; B: declare i32 @use(i32) local_unnamed_addr
+; B: define available_externally i32 @use2(i32 %n) local_unnamed_addr
+; B: declare i32 @external.ifunc(i32)
+
+;--- a.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+$internal.resolver = comdat any
+
+@__cpu_model = external dso_local local_unnamed_addr global { i32, i32, i32, [1 x i32] }
+
+@internal.ifunc = internal ifunc i32 (i32), ptr @internal.resolver
+@external.ifunc = ifunc i32 (i32), ptr @internal.resolver
+
+define dso_local i32 @use(i32 %n) local_unnamed_addr {
+entry:
+ %call = call i32 @internal.ifunc(i32 %n)
+ ret i32 %call
+}
+
+define dso_local i32 @use2(i32 %n) local_unnamed_addr {
+entry:
+ %call = call i32 @external.ifunc(i32 %n)
+ ret i32 %call
+}
+
+define internal nonnull ptr @internal.resolver() comdat {
+entry:
+ %0 = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i64 0, i32 3, i64 0), align 4
+ %1 = and i32 %0, 4
+ %.not = icmp eq i32 %1, 0
+ %internal.popcnt.0.internal.default.1 = select i1 %.not, ptr @internal.default.1, ptr @internal.popcnt.0
+ ret ptr %internal.popcnt.0.internal.default.1
+}
+
+define internal i32 @internal.popcnt.0(i32 %n) {
+entry:
+ %0 = call i32 @llvm.ctpop.i32(i32 %n)
+ ret i32 %0
+}
+
+declare i32 @llvm.ctpop.i32(i32)
+
+define internal i32 @internal.default.1(i32 %n) {
+entry:
+ %0 = call i32 @llvm.ctpop.i32(i32 %n)
+ ret i32 %0
+}
+
+;--- b.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local i32 @main() local_unnamed_addr {
+entry:
+ %0 = call i32 @use(i32 0)
+ %1 = call i32 @use2(i32 0)
+ %2 = add i32 %0, %1
+ ret i32 %2
+}
+
+declare i32 @use(i32) local_unnamed_addr
+declare i32 @use2(i32) local_unnamed_addr