From 04a2bca6ee0fbea6a9dc84f59e8bf4a41f8ae230 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Sun, 3 Mar 2024 22:16:03 -0800 Subject: [Inline]Update value profile for non-call instructions --- llvm/include/llvm/IR/ProfDataUtils.h | 3 + llvm/lib/IR/ProfDataUtils.cpp | 32 ++++++++ llvm/lib/Transforms/Utils/InlineFunction.cpp | 26 ++++++- .../test/Transforms/Inline/update_value_profile.ll | 89 ++++++++++++++++++++++ 4 files changed, 147 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/Inline/update_value_profile.ll diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 255fa2f..2010c4b 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -108,5 +108,8 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights); /// a `prof` metadata reference to instruction `I`. void setBranchWeights(Instruction &I, ArrayRef Weights); +/// Scaling value profile 'ProfData' using the ratio of S/T. +MDNode *scaleValueProfile(const MDNode *ProfData, uint64_t S, uint64_t T); + } // namespace llvm #endif diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index dcb057c..db91a66 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -190,4 +190,36 @@ void setBranchWeights(Instruction &I, ArrayRef Weights) { I.setMetadata(LLVMContext::MD_prof, BranchWeights); } +MDNode *scaleValueProfile(const MDNode *ProfData, uint64_t S, uint64_t T) { + if (ProfData == nullptr) + return nullptr; + assert( + dyn_cast(ProfData->getOperand(0))->getString().equals("VP") && + "Expects value profile metadata"); + LLVMContext &C = ProfData->getContext(); + MDBuilder MDB(C); + APInt APS(128, S), APT(128, T); + + SmallVector Vals; + Vals.push_back(ProfData->getOperand(0)); + for (unsigned i = 1; i < ProfData->getNumOperands(); i += 2) { + Vals.push_back(ProfData->getOperand(i)); + uint64_t Count = + mdconst::dyn_extract(ProfData->getOperand(i + 1)) + ->getValue() + .getZExtValue(); + // Don't scale the magic number. + if (Count == NOMORE_ICP_MAGICNUM) { + Vals.push_back(ProfData->getOperand(i + 1)); + continue; + } + // Using APInt::div may be expensive, but most cases should fit 64 bits. + APInt Val(128, Count); + Val *= APS; + Vals.push_back(MDB.createConstant(ConstantInt::get( + Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue()))); + } + return MDNode::get(C, Vals); +} + } // namespace llvm diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index d4d4bf5..7cc1641 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" @@ -30,8 +31,8 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" -#include "llvm/IR/AttributeMask.h" #include "llvm/IR/Argument.h" +#include "llvm/IR/AttributeMask.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" @@ -55,6 +56,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -1910,9 +1912,18 @@ void llvm::updateProfileCallee( if (VMap) { uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; for (auto Entry : *VMap) + // FIXME: Update the profiles for invoke instruction after inline if (isa(Entry.first)) - if (auto *CI = dyn_cast_or_null(Entry.second)) + if (auto *CI = dyn_cast_or_null(Entry.second)) { CI->updateProfWeight(CloneEntryCount, PriorEntryCount); + Instruction *VPtr = + PGOIndirectCallVisitor::tryGetVTableInstruction(CI); + if (VPtr) + VPtr->setMetadata( + LLVMContext::MD_prof, + scaleValueProfile(VPtr->getMetadata(LLVMContext::MD_prof), + CloneEntryCount, PriorEntryCount)); + } } if (EntryDelta) { @@ -1922,8 +1933,17 @@ void llvm::updateProfileCallee( // No need to update the callsite if it is pruned during inlining. if (!VMap || VMap->count(&BB)) for (Instruction &I : BB) - if (CallInst *CI = dyn_cast(&I)) + // FIXME: Update the profiles for invoke instruction after inline + if (CallInst *CI = dyn_cast(&I)) { CI->updateProfWeight(NewEntryCount, PriorEntryCount); + Instruction *VPtr = + PGOIndirectCallVisitor::tryGetVTableInstruction(CI); + if (VPtr) + VPtr->setMetadata( + LLVMContext::MD_prof, + scaleValueProfile(VPtr->getMetadata(LLVMContext::MD_prof), + NewEntryCount, PriorEntryCount)); + } } } diff --git a/llvm/test/Transforms/Inline/update_value_profile.ll b/llvm/test/Transforms/Inline/update_value_profile.ll new file mode 100644 index 0000000..ae59a2d --- /dev/null +++ b/llvm/test/Transforms/Inline/update_value_profile.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes='require,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;@_ZTV4Base = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base3keyEv, ptr @_ZN4Base4funcEi] } +;@_ZTV7Derived = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base3keyEv, ptr @_ZN7Derived4funcEi] } + +define i32 @callee(ptr %0, i32 %1) !prof !20 { +; CHECK-LABEL: define i32 @callee( +; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] { +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8, !prof [[PROF1:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %3 = load ptr, ptr %0, !prof !21 + %5 = getelementptr inbounds i8, ptr %3, i64 8 + %6 = load ptr, ptr %5 + %7 = tail call i32 %6(ptr %0, i32 %1), !prof !17 + ret i32 %7 +} + +define i32 @caller1(i32 %0) !prof !18 { +; CHECK-LABEL: define i32 @caller1( +; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF3:![0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF4:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %2 = tail call ptr @_Z10createTypei(i32 %0) + %3 = tail call i32 @callee(ptr %2, i32 %0) + ret i32 %3 +} + +define i32 @caller2(i32 %0) !prof !19 { +; CHECK-LABEL: define i32 @caller2( +; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF6:![0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF7:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF8:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %2 = tail call ptr @_Z10createTypei(i32 %0) + %3 = tail call i32 @callee(ptr %2, i32 %0) + ret i32 %3 +} + +declare ptr @_Z10createTypei(i32) + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +;!15 = !{i64 16, !"_ZTS4Base"} +;!16= !{i64 16, !"_ZTS7Derived"} +!17 = !{!"VP", i32 0, i64 1600, i64 15186643663281959480, i64 1000, i64 15101948577241817854, i64 600} +!18 = !{!"function_entry_count", i64 1000} +!19 = !{!"function_entry_count", i64 600} +!20 = !{!"function_entry_count", i64 1700} +!21 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 13870436605473471591, i64 600} + +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100} +; CHECK: [[PROF1]] = !{!"VP", i32 2, i64 94, i64 1960855528937986108, i64 58, i64 -4576307468236080025, i64 35} +; CHECK: [[PROF2]] = !{!"VP", i32 0, i64 94, i64 -3260100410427592136, i64 58, i64 -3344795496467733762, i64 35} +; CHECK: [[PROF3]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF4]] = !{!"VP", i32 2, i64 941, i64 1960855528937986108, i64 588, i64 -4576307468236080025, i64 352} +; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 941, i64 -3260100410427592136, i64 588, i64 -3344795496467733762, i64 352} +; CHECK: [[PROF6]] = !{!"function_entry_count", i64 600} +; CHECK: [[PROF7]] = !{!"VP", i32 2, i64 564, i64 1960855528937986108, i64 352, i64 -4576307468236080025, i64 211} +; CHECK: [[PROF8]] = !{!"VP", i32 0, i64 564, i64 -3260100410427592136, i64 352, i64 -3344795496467733762, i64 211} +;. -- cgit v1.1