diff options
author | Fangrui Song <i@maskray.me> | 2023-08-29 12:26:40 -0700 |
---|---|---|
committer | Fangrui Song <i@maskray.me> | 2023-08-29 12:26:40 -0700 |
commit | 6276927bf3f6ce4a9ef0b9941b2c6450ae4cd1eb (patch) | |
tree | 0412e5fad4769ad12aedfa8d7eb30dba40890b8f /llvm/test/ThinLTO | |
parent | 39191c45771564b8a0930d71b4c229147cf839db (diff) | |
download | llvm-6276927bf3f6ce4a9ef0b9941b2c6450ae4cd1eb.zip llvm-6276927bf3f6ce4a9ef0b9941b2c6450ae4cd1eb.tar.gz llvm-6276927bf3f6ce4a9ef0b9941b2c6450ae4cd1eb.tar.bz2 |
[ThinLTO] Mark callers of local ifunc not eligible for import
Fix https://github.com/llvm/llvm-project/issues/58740
The `target_clones` attribute results in ifunc on eligible targets
(Linux glibc/Android or FreeBSD). If the function has internal linkage,
we will get an internal linkage ifunc.
```
__attribute__((target_clones("popcnt", "default")))
static int foo(int n) { return __builtin_popcount(n); }
int use(int n) { return foo(n); }
@foo.ifunc = internal ifunc i32 (i32), ptr @foo.resolver
define internal nonnull ptr @foo.resolver() comdat {
; local linkage comdat is another issue that should be fixed
...
select i1 %.not, ptr @foo.default.1, ptr @foo.popcnt.0
...
}
define internal i32 @foo.default.1(i32 noundef %n)
```
ifuncs are not included in module summaries, so LTO doesn't know the
local linkage `foo.default.1` referenced by `foo.resolver`
should be promoted. If a caller of `foo` (e.g. `use`) is imported,
the local linkage `foo.resolver` will be cloned as a definition
(IRLinker::shouldLink), leading to linker errors.
```
ld.lld: error: undefined hidden symbol: foo.default.1.llvm.8017227050314953235
>>> referenced by bar.c
>>> lto.tmp:(foo.ifunc)
```
As a simple fix, just mark `use` as not eligible for import. Non-local
linkage ifuncs do not have the problem, because they are not imported,
and not cloned when a caller is imported.
---
https://reviews.llvm.org/D82745 contains a more involved fix, though the
original bug it intended to fix
(https://github.com/llvm/llvm-project/issues/45833) now works.
Note: importing ifunc is tricky.
If we import an ifunc, we need to make sure the resolver and the
implementation are in the translation unit, as required by
https://sourceware.org/glibc/wiki/GNU_IFUNC
> Requirement (a): Resolver must be defined in the same translation unit as the implementations.
This is infeasible if the implementation is changed to
available_externally.
In addition, the imported ifunc may be referenced by two translation
units. This doesn't work with PowerPC32 -msecure-plt
(https://maskray.me/blog/2021-01-18-gnu-indirect-function).
At the very least, every referencing translation unit needs one extra
IRELATIVE dynamic relocation.
At least for the local linkage ifunc case, it doesn't have much use
outside of `target_clones`, as a global pointer is usually a better
replacement.
I think ifuncs just have too many pitfalls to design more IR features
around it to optimize them.
Reviewed By: tejohnson
Differential Revision: https://reviews.llvm.org/D158961
Diffstat (limited to 'llvm/test/ThinLTO')
-rw-r--r-- | llvm/test/ThinLTO/X86/ifunc-import.ll | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/llvm/test/ThinLTO/X86/ifunc-import.ll b/llvm/test/ThinLTO/X86/ifunc-import.ll new file mode 100644 index 0000000..8df43e2 --- /dev/null +++ b/llvm/test/ThinLTO/X86/ifunc-import.ll @@ -0,0 +1,79 @@ +; RUN: rm -rf %t && split-file %s %t && cd %t +; RUN: opt -module-summary -o a.bc a.ll +; RUN: opt -module-summary -o b.bc b.ll +; RUN: llvm-lto2 run a.bc b.bc -o t --save-temps \ +; RUN: -r a.bc,external.ifunc,pl -r a.bc,use,pl -r a.bc,use2,pl -r a.bc,__cpu_model,lx \ +; RUN: -r b.bc,main,plx -r b.bc,use,l -r b.bc,use2,l +; RUN: llvm-dis < t.1.3.import.bc | FileCheck %s --check-prefix=A +; RUN: llvm-dis < t.2.3.import.bc | FileCheck %s --check-prefix=B --implicit-check-not='@internal.resolver' + +; A: define internal nonnull ptr @internal.resolver() +; A: define internal i32 @internal.default.1(i32 %n) + +;; The ifunc implementations of internal.ifunc are internal in A, so they cannot +;; be referenced by B. Our implementation actually ensures that the ifunc resolver +;; along with its implementations are not imported. +; B: declare i32 @use(i32) local_unnamed_addr +; B: define available_externally i32 @use2(i32 %n) local_unnamed_addr +; B: declare i32 @external.ifunc(i32) + +;--- a.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$internal.resolver = comdat any + +@__cpu_model = external dso_local local_unnamed_addr global { i32, i32, i32, [1 x i32] } + +@internal.ifunc = internal ifunc i32 (i32), ptr @internal.resolver +@external.ifunc = ifunc i32 (i32), ptr @internal.resolver + +define dso_local i32 @use(i32 %n) local_unnamed_addr { +entry: + %call = call i32 @internal.ifunc(i32 %n) + ret i32 %call +} + +define dso_local i32 @use2(i32 %n) local_unnamed_addr { +entry: + %call = call i32 @external.ifunc(i32 %n) + ret i32 %call +} + +define internal nonnull ptr @internal.resolver() comdat { +entry: + %0 = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i64 0, i32 3, i64 0), align 4 + %1 = and i32 %0, 4 + %.not = icmp eq i32 %1, 0 + %internal.popcnt.0.internal.default.1 = select i1 %.not, ptr @internal.default.1, ptr @internal.popcnt.0 + ret ptr %internal.popcnt.0.internal.default.1 +} + +define internal i32 @internal.popcnt.0(i32 %n) { +entry: + %0 = call i32 @llvm.ctpop.i32(i32 %n) + ret i32 %0 +} + +declare i32 @llvm.ctpop.i32(i32) + +define internal i32 @internal.default.1(i32 %n) { +entry: + %0 = call i32 @llvm.ctpop.i32(i32 %n) + ret i32 %0 +} + +;--- b.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local i32 @main() local_unnamed_addr { +entry: + %0 = call i32 @use(i32 0) + %1 = call i32 @use2(i32 0) + %2 = add i32 %0, %1 + ret i32 %2 +} + +declare i32 @use(i32) local_unnamed_addr +declare i32 @use2(i32) local_unnamed_addr |