diff options
author | Fangrui Song <i@maskray.me> | 2021-03-01 13:43:23 -0800 |
---|---|---|
committer | Fangrui Song <i@maskray.me> | 2021-03-01 13:43:23 -0800 |
commit | 04c3040f417683e7c31b3ee3381a3263106f48c5 (patch) | |
tree | d55a29f805894517702e45b07b00ae6d9c4cd6a1 | |
parent | 5a9c34918bb1526b7e8c29aa5e4fb8d8e27e27b4 (diff) | |
download | llvm-04c3040f417683e7c31b3ee3381a3263106f48c5.zip llvm-04c3040f417683e7c31b3ee3381a3263106f48c5.tar.gz llvm-04c3040f417683e7c31b3ee3381a3263106f48c5.tar.bz2 |
[InstrProfiling] Place __llvm_prf_vnodes and __llvm_prf_names in llvm.used on ELF
`__llvm_prf_vnodes` and `__llvm_prf_names` are used by runtime but not
referenced via relocation in the translation unit.
With `-z start-stop-gc` (D96914 https://sourceware.org/bugzilla/show_bug.cgi?id=27451),
the linker no longer lets `__start_/__stop_` references retain them.
Place `__llvm_prf_vnodes` and `__llvm_prf_names` in `llvm.used` to make
them retained by the linker.
This patch changes most existing `UsedVars` cases to `CompilerUsedVars`
to reflect the ideal state - if the binary format properly supports
section based GC (dead stripping), `llvm.compiler.used` should be sufficient.
`__llvm_prf_vnodes` and `__llvm_prf_names` are switched to `UsedVars`
since we want them to be unconditionally retained by both compiler and linker.
Behaviors on other COFF/Mach-O are not affected.
Differential Revision: https://reviews.llvm.org/D97649
6 files changed, 123 insertions, 75 deletions
diff --git a/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c b/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c new file mode 100644 index 0000000..a23bc48 --- /dev/null +++ b/compiler-rt/test/profile/Linux/Inputs/instrprof-value-merge.c @@ -0,0 +1,70 @@ +#include <string.h> + +void (*f0)(); +void (*f1)(); +void (*f2)(); + +char dst[200]; +char src[200]; +volatile int n; + +__attribute__((noinline)) void foo() {} + +__attribute__((noinline)) void bar() { + f0 = foo; + f1 = foo; + f2 = foo; + n = 4; +} +int main(int argc, char *argv[]) { + int i; + bar(); + if (argc == 1) { + f0(); + for (i = 0; i < 9; i++) + f1(); + for (i = 0; i < 99; i++) + f2(); + } else { + memcpy((void *)dst, (void *)src, n); + for (i = 0; i < 6; i++) + memcpy((void *)(dst + 2), (void *)src, n + 1); + for (i = 0; i < 66; i++) + memcpy((void *)(dst + 9), (void *)src, n + 2); + } +} + +// CHECK: Counters: +// CHECK: main: +// CHECK: Hash: 0x0a9bd81e87ab6e87 +// CHECK: Counters: 6 +// CHECK: Indirect Call Site Count: 3 +// CHECK: Number of Memory Intrinsics Calls: 3 +// CHECK: Block counts: [27, 297, 12, 132, 3, 2] +// CHECK: Indirect Target Results: +// CHECK: [ 0, foo, 3 ] +// CHECK: [ 1, foo, 27 ] +// CHECK: [ 2, foo, 297 ] +// CHECK: Memory Intrinsic Size Results: +// CHECK: [ 0, 4, 2 ] +// CHECK: [ 1, 5, 12 ] +// CHECK: [ 2, 6, 132 ] +// CHECK: Instrumentation level: IR +// CHECK: Functions shown: 1 +// CHECK: Total functions: 3 +// CHECK: Maximum function count: 327 +// CHECK: Maximum internal block count: 297 +// CHECK: Statistics for indirect call sites profile: +// CHECK: Total number of sites: 3 +// CHECK: Total number of sites with values: 3 +// CHECK: Total number of profiled values: 3 +// CHECK: Value sites histogram: +// CHECK: NumTargets, SiteCount +// CHECK: 1, 3 +// CHECK: Statistics for memory intrinsic calls sizes profile: +// CHECK: Total number of sites: 3 +// CHECK: Total number of sites with values: 3 +// CHECK: Total number of profiled values: 3 +// CHECK: Value sites histogram: +// CHECK: NumTargets, SiteCount +// CHECK: 1, 3 diff --git a/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c b/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c new file mode 100644 index 0000000..e0079c0 --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-value-merge-lld.c @@ -0,0 +1,10 @@ +// REQUIRES: lld-available + +// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=lld -ffunction-sections -fdata-sections -Wl,--gc-sections -z start-stop-gc +// RUN: rm -rf %t.profdir +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c diff --git a/compiler-rt/test/profile/Linux/instrprof-value-merge.c b/compiler-rt/test/profile/Linux/instrprof-value-merge.c index 2619a1d0..45eed47 100644 --- a/compiler-rt/test/profile/Linux/instrprof-value-merge.c +++ b/compiler-rt/test/profile/Linux/instrprof-value-merge.c @@ -1,79 +1,27 @@ -// RUN: %clang_pgogen -o %t -O3 %s +// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c // RUN: rm -rf %t.profdir // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 // RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 -// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %s +// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c -#include <string.h> - -void (*f0)(); -void (*f1)(); -void (*f2)(); - -char dst[200]; -char src[200]; -volatile int n; - -__attribute__((noinline)) void foo() {} - -__attribute__((noinline)) void bar() { - f0 = foo; - f1 = foo; - f2 = foo; - n = 4; -} -int main(int argc, char *argv[]) { - int i; - bar(); - if (argc == 1) { - f0(); - for (i = 0; i < 9; i++) - f1(); - for (i = 0; i < 99; i++) - f2(); - } else { - memcpy((void *)dst, (void *)src, n); - for (i = 0; i < 6; i++) - memcpy((void *)(dst + 2), (void *)src, n + 1); - for (i = 0; i < 66; i++) - memcpy((void *)(dst + 9), (void *)src, n + 2); - } -} +/// -z start-stop-gc requires binutils 2.37. +// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=bfd -ffunction-sections -fdata-sections -Wl,--gc-sections +// RUN: rm -rf %t.profdir +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c -// CHECK: Counters: -// CHECK: main: -// CHECK: Hash: 0x0a9bd81e87ab6e87 -// CHECK: Counters: 6 -// CHECK: Indirect Call Site Count: 3 -// CHECK: Number of Memory Intrinsics Calls: 3 -// CHECK: Block counts: [27, 297, 12, 132, 3, 2] -// CHECK: Indirect Target Results: -// CHECK: [ 0, foo, 3 ] -// CHECK: [ 1, foo, 27 ] -// CHECK: [ 2, foo, 297 ] -// CHECK: Memory Intrinsic Size Results: -// CHECK: [ 0, 4, 2 ] -// CHECK: [ 1, 5, 12 ] -// CHECK: [ 2, 6, 132 ] -// CHECK: Instrumentation level: IR -// CHECK: Functions shown: 1 -// CHECK: Total functions: 3 -// CHECK: Maximum function count: 327 -// CHECK: Maximum internal block count: 297 -// CHECK: Statistics for indirect call sites profile: -// CHECK: Total number of sites: 3 -// CHECK: Total number of sites with values: 3 -// CHECK: Total number of profiled values: 3 -// CHECK: Value sites histogram: -// CHECK: NumTargets, SiteCount -// CHECK: 1, 3 -// CHECK: Statistics for memory intrinsic calls sizes profile: -// CHECK: Total number of sites: 3 -// CHECK: Total number of sites with values: 3 -// CHECK: Total number of profiled values: 3 -// CHECK: Value sites histogram: -// CHECK: NumTargets, SiteCount -// CHECK: 1, 3 +// RUN: %clang_pgogen -o %t -O3 %S/Inputs/instrprof-value-merge.c -fuse-ld=gold -ffunction-sections -fdata-sections -Wl,--gc-sections +// RUN: rm -rf %t.profdir +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %S/Inputs/instrprof-value-merge.c diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 5242211..94b156f 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -57,6 +57,7 @@ private: } }; DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap; + std::vector<GlobalValue *> CompilerUsedVars; std::vector<GlobalValue *> UsedVars; std::vector<GlobalVariable *> ReferencedNames; GlobalVariable *NamesVar; diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index d73bb66..a17d6f5 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -539,6 +539,7 @@ bool InstrProfiling::run( NamesVar = nullptr; NamesSize = 0; ProfileDataMap.clear(); + CompilerUsedVars.clear(); UsedVars.clear(); TT = Triple(M.getTargetTriple()); @@ -921,7 +922,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { ProfileDataMap[NamePtr] = PD; // Mark the data variable as used so that it isn't stripped out. - UsedVars.push_back(Data); + CompilerUsedVars.push_back(Data); // Now that the linkage set by the FE has been passed to the data and counter // variables, reset Name variable's linkage and visibility to private so that // it can be removed later by the compiler. @@ -976,6 +977,8 @@ void InstrProfiling::emitVNodes() { Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName()); VNodesVar->setSection( getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat())); + // VNodesVar is used by runtime but not referenced via relocation by other + // sections. Conservatively make it linker retained. UsedVars.push_back(VNodesVar); } @@ -1004,6 +1007,8 @@ void InstrProfiling::emitNameData() { // linker from inserting padding before the start of the names section or // between names entries. NamesVar->setAlignment(Align(1)); + // NamesVar is used by runtime but not referenced via relocation by other + // sections. Conservatively make it linker retained. UsedVars.push_back(NamesVar); for (auto *NamePtr : ReferencedNames) @@ -1031,6 +1036,9 @@ void InstrProfiling::emitRegistration() { getInstrProfRegFuncName(), M); IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF)); + for (Value *Data : CompilerUsedVars) + if (Data != NamesVar && !isa<Function>(Data)) + IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy)); for (Value *Data : UsedVars) if (Data != NamesVar && !isa<Function>(Data)) IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy)); @@ -1081,7 +1089,7 @@ bool InstrProfiling::emitRuntimeHook() { IRB.CreateRet(Load); // Mark the user variable as used so that it isn't stripped out. - UsedVars.push_back(User); + CompilerUsedVars.push_back(User); return true; } @@ -1094,9 +1102,14 @@ void InstrProfiling::emitUses() { // or discarded as a unit, so llvm.compiler.used is sufficient. Otherwise, // conservatively make all of them retained by the linker. if (TT.isOSBinFormatELF()) - appendToCompilerUsed(*M, UsedVars); + appendToCompilerUsed(*M, CompilerUsedVars); else - appendToUsed(*M, UsedVars); + appendToUsed(*M, CompilerUsedVars); + + // We do not add proper references from used metadata sections to NamesVar and + // VNodesVar, so we have to be conservative and place them in llvm.used + // regardless of the target, + appendToUsed(*M, UsedVars); } void InstrProfiling::emitInitialization() { diff --git a/llvm/test/Instrumentation/InstrProfiling/icall.ll b/llvm/test/Instrumentation/InstrProfiling/icall.ll index 311770a..bc7d6c9 100644 --- a/llvm/test/Instrumentation/InstrProfiling/icall.ll +++ b/llvm/test/Instrumentation/InstrProfiling/icall.ll @@ -50,6 +50,12 @@ attributes #0 = { nounwind } ; DYN-NOT: @__profvp_foo ; DYN-NOT: @__llvm_prf_vnodes +;; __llvm_prf_vnodes and __llvm_prf_nm are not referenced by other metadata sections. +;; We have to conservatively place them in llvm.used. +; STATIC: @llvm.used = appending global +; STATIC-SAME: @__llvm_prf_vnodes +; STATIC-SAME: @__llvm_prf_nm + ; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0) ; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0) ; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0) |