diff options
author | Teresa Johnson <tejohnson@google.com> | 2023-05-04 20:52:48 -0700 |
---|---|---|
committer | Teresa Johnson <tejohnson@google.com> | 2023-05-05 07:06:41 -0700 |
commit | 1a3947df8545674d937deeba4aaaf08ea39e7153 (patch) | |
tree | 77c8e3a22a17a5db6642fddd799b52f757632916 /llvm/test/ThinLTO | |
parent | b18161d7850c5102e9882649278213e226bed610 (diff) | |
download | llvm-1a3947df8545674d937deeba4aaaf08ea39e7153.zip llvm-1a3947df8545674d937deeba4aaaf08ea39e7153.tar.gz llvm-1a3947df8545674d937deeba4aaaf08ea39e7153.tar.bz2 |
[MemProf] Use MapVector to avoid non-determinism
Multiple cases of instability in the cloning behavior occurred due to
iteration of maps indexed by pointers. Fix by changing the maps to
MapVector. This necessitated adding DenseMapInfo specializations for the
structure types used in the keys.
These were found while trying to commit patch 3 of the cloning
(bfe7205975a63a605ff3faacd97fe4c1bf4c19b3), but the second one turned
out to be in code committed in patch 2, but just exposed by a new test
added with patch 3. Specifically, the iteration in identifyClones().
Added the portion of the new test cases from patch 3 that only relied on
the already committed changes and exposed the issue.
Differential Revision: https://reviews.llvm.org/D149924
Diffstat (limited to 'llvm/test/ThinLTO')
-rw-r--r-- | llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll b/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll new file mode 100644 index 0000000..38bfe9d --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll @@ -0,0 +1,223 @@ +;; Test context disambiguation for a callgraph containing multiple memprof +;; contexts and no inlining, where we need to perform additional cloning +;; during function assignment/cloning to handle the combination of contexts +;; to 2 different allocations. +;; +;; void E(char **buf1, char **buf2) { +;; *buf1 = new char[10]; +;; *buf2 = new char[10]; +;; } +;; +;; void B(char **buf1, char **buf2) { +;; E(buf1, buf2); +;; } +;; +;; void C(char **buf1, char **buf2) { +;; E(buf1, buf2); +;; } +;; +;; void D(char **buf1, char **buf2) { +;; E(buf1, buf2); +;; } +;; int main(int argc, char **argv) { +;; char *cold1, *cold2, *default1, *default2, *default3, *default4; +;; B(&default1, &default2); +;; C(&default3, &cold1); +;; D(&cold2, &default4); +;; memset(cold1, 0, 10); +;; memset(cold2, 0, 10); +;; memset(default1, 0, 10); +;; memset(default2, 0, 10); +;; memset(default3, 0, 10); +;; memset(default4, 0, 10); +;; delete[] default1; +;; delete[] default2; +;; delete[] default3; +;; delete[] default4; +;; sleep(10); +;; delete[] cold1; +;; delete[] cold2; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + + +;; Try again but with distributed ThinLTO +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP + + +source_filename = "funcassigncloning.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline optnone +define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) { +entry: + %call = call ptr @_Znam(i64 noundef 10), !memprof !0, !callsite !7 + %call1 = call ptr @_Znam(i64 noundef 10), !memprof !8, !callsite !15 + ret void +} + +declare ptr @_Znam(i64) + +define internal void @_Z1BPPcS0_() { +entry: + call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !16 + ret void +} + +define internal void @_Z1CPPcS0_() { +entry: + call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !17 + ret void +} + +define internal void @_Z1DPPcS0_() { +entry: + call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !18 + ret void +} + +; Function Attrs: noinline optnone +define i32 @main() { +entry: + call void @_Z1BPPcS0_() + call void @_Z1CPPcS0_() + call void @_Z1DPPcS0_() + ret i32 0 +} + +declare void @_ZdaPv() + +declare i32 @sleep() + +; uselistorder directives +uselistorder ptr @_Znam, { 1, 0 } + +!0 = !{!1, !3, !5} +!1 = !{!2, !"cold"} +!2 = !{i64 -3461278137325233666, i64 -7799663586031895603} +!3 = !{!4, !"notcold"} +!4 = !{i64 -3461278137325233666, i64 -3483158674395044949} +!5 = !{!6, !"notcold"} +!6 = !{i64 -3461278137325233666, i64 -2441057035866683071} +!7 = !{i64 -3461278137325233666} +!8 = !{!9, !11, !13} +!9 = !{!10, !"notcold"} +!10 = !{i64 -1415475215210681400, i64 -2441057035866683071} +!11 = !{!12, !"cold"} +!12 = !{i64 -1415475215210681400, i64 -3483158674395044949} +!13 = !{!14, !"notcold"} +!14 = !{i64 -1415475215210681400, i64 -7799663586031895603} +!15 = !{i64 -1415475215210681400} +!16 = !{i64 -2441057035866683071} +!17 = !{i64 -3483158674395044949} +!18 = !{i64 -7799663586031895603} + + +;; Originally we create a single clone of each call to new from E, since each +;; allocates cold memory for a single caller. + +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 2 StackIds: 0 +; DUMP: AllocType 1 StackIds: 1 +; DUMP: AllocType 1 StackIds: 2 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 3 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 +; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Clones: [[ENEW1CLONE:0x[a-z0-9]+]] + +; DUMP: Node [[D:0x[a-z0-9]+]] +; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1 +; DUMP: Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6 +; DUMP: CallerEdges: + +; DUMP: Node [[C]] +; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2 +; DUMP: Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5 +; DUMP: CallerEdges: + +; DUMP: Node [[B]] +; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[ENEW2ORIG]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 2 +; DUMP: AllocType 2 StackIds: 1 +; DUMP: AllocType 1 StackIds: 0 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 4 6 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4 +; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6 +; DUMP: Clones: [[ENEW2CLONE]] + +; DUMP: Node [[ENEW1CLONE]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 2 StackIds: 0 +; DUMP: AllocType 1 StackIds: 1 +; DUMP: AllocType 1 StackIds: 2 +; DUMP: (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1 +; DUMP: Clone of [[ENEW1ORIG]] + +; DUMP: Node [[ENEW2CLONE]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 2 +; DUMP: AllocType 2 StackIds: 1 +; DUMP: AllocType 1 StackIds: 0 +; DUMP: (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 5 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5 +; DUMP: Clone of [[ENEW2ORIG]] |