diff options
author | Mingming Liu <mingmingl@google.com> | 2024-03-27 11:57:07 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-27 11:57:07 -0700 |
commit | 2d641858fa44db315a42fa1b5ba43540f1ca1ea4 (patch) | |
tree | bc5dbf4f1c1897b39f3b9d422d83539a7b6e84b0 | |
parent | 2329fb29d141bc356e4b5b859ab290b02f0b3cf6 (diff) | |
download | llvm-2d641858fa44db315a42fa1b5ba43540f1ca1ea4.zip llvm-2d641858fa44db315a42fa1b5ba43540f1ca1ea4.tar.gz llvm-2d641858fa44db315a42fa1b5ba43540f1ca1ea4.tar.bz2 |
[nfc][PGO]Factor out profile scaling into a standalone helper function (#83780)
- Put the helper function in `ProfDataUtil.h/cpp`, which is already a
dependency of `Instructions.cpp`
- The helper function could be re-used to update profiles of
`InvokeInst` (in a follow-up pull request)
-rw-r--r-- | llvm/include/llvm/IR/ProfDataUtils.h | 3 | ||||
-rw-r--r-- | llvm/lib/IR/Instructions.cpp | 46 | ||||
-rw-r--r-- | llvm/lib/IR/ProfDataUtils.cpp | 48 | ||||
-rw-r--r-- | llvm/test/Transforms/Inline/update_invoke_prof.ll | 64 | ||||
-rw-r--r-- | llvm/test/Transforms/Inline/update_value_profile.ll | 81 |
5 files changed, 197 insertions, 45 deletions
diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 255fa2f..c089740 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -108,5 +108,8 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights); /// a `prof` metadata reference to instruction `I`. void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights); +/// Scaling the profile data attached to 'I' using the ratio of S/T. +void scaleProfData(Instruction &I, uint64_t S, uint64_t T); + } // namespace llvm #endif diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index c2abe87..cec02e2 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -833,15 +833,6 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB, // of S/T. The meaning of "branch_weights" meta data for call instruction is // transfered to represent call count. void CallInst::updateProfWeight(uint64_t S, uint64_t T) { - auto *ProfileData = getMetadata(LLVMContext::MD_prof); - if (ProfileData == nullptr) - return; - - auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0)); - if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") && - !ProfDataName->getString().equals("VP"))) - return; - if (T == 0) { LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in " "div by 0. Ignoring. Likely the function " @@ -850,42 +841,7 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) { "with non-zero prof info."); return; } - - MDBuilder MDB(getContext()); - SmallVector<Metadata *, 3> Vals; - Vals.push_back(ProfileData->getOperand(0)); - APInt APS(128, S), APT(128, T); - if (ProfDataName->getString().equals("branch_weights") && - ProfileData->getNumOperands() > 0) { - // Using APInt::div may be expensive, but most cases should fit 64 bits. - APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)) - ->getValue() - .getZExtValue()); - Val *= APS; - Vals.push_back(MDB.createConstant( - ConstantInt::get(Type::getInt32Ty(getContext()), - Val.udiv(APT).getLimitedValue(UINT32_MAX)))); - } else if (ProfDataName->getString().equals("VP")) - for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) { - // The first value is the key of the value profile, which will not change. - Vals.push_back(ProfileData->getOperand(i)); - uint64_t Count = - mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1)) - ->getValue() - .getZExtValue(); - // Don't scale the magic number. - if (Count == NOMORE_ICP_MAGICNUM) { - Vals.push_back(ProfileData->getOperand(i + 1)); - continue; - } - // Using APInt::div may be expensive, but most cases should fit 64 bits. - APInt Val(128, Count); - Val *= APS; - Vals.push_back(MDB.createConstant( - ConstantInt::get(Type::getInt64Ty(getContext()), - Val.udiv(APT).getLimitedValue()))); - } - setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals)); + scaleProfData(*this, S, T); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index b1a10d0..dc86f42 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -190,4 +190,52 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights) { I.setMetadata(LLVMContext::MD_prof, BranchWeights); } +void scaleProfData(Instruction &I, uint64_t S, uint64_t T) { + assert(T != 0 && "Caller should guarantee"); + auto *ProfileData = I.getMetadata(LLVMContext::MD_prof); + if (ProfileData == nullptr) + return; + + auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0)); + if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") && + !ProfDataName->getString().equals("VP"))) + return; + + LLVMContext &C = I.getContext(); + + MDBuilder MDB(C); + SmallVector<Metadata *, 3> Vals; + Vals.push_back(ProfileData->getOperand(0)); + APInt APS(128, S), APT(128, T); + if (ProfDataName->getString().equals("branch_weights") && + ProfileData->getNumOperands() > 0) { + // Using APInt::div may be expensive, but most cases should fit 64 bits. + APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)) + ->getValue() + .getZExtValue()); + Val *= APS; + Vals.push_back(MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(C), Val.udiv(APT).getLimitedValue(UINT32_MAX)))); + } else if (ProfDataName->getString().equals("VP")) + for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) { + // The first value is the key of the value profile, which will not change. + Vals.push_back(ProfileData->getOperand(i)); + uint64_t Count = + mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1)) + ->getValue() + .getZExtValue(); + // Don't scale the magic number. + if (Count == NOMORE_ICP_MAGICNUM) { + Vals.push_back(ProfileData->getOperand(i + 1)); + continue; + } + // Using APInt::div may be expensive, but most cases should fit 64 bits. + APInt Val(128, Count); + Val *= APS; + Vals.push_back(MDB.createConstant(ConstantInt::get( + Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue()))); + } + I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals)); +} + } // namespace llvm diff --git a/llvm/test/Transforms/Inline/update_invoke_prof.ll b/llvm/test/Transforms/Inline/update_invoke_prof.ll new file mode 100644 index 0000000..5f09c7c --- /dev/null +++ b/llvm/test/Transforms/Inline/update_invoke_prof.ll @@ -0,0 +1,64 @@ +; A pre-commit test to show that branch weights and value profiles associated with invoke are not updated. +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s + +declare i32 @__gxx_personality_v0(...) + +define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 { + call void @callee(ptr %func), !prof !16 + ret void +} + +declare void @inner_callee(ptr %func) + +define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 { + invoke void %func() + to label %next unwind label %lpad, !prof !18 + +next: + invoke void @inner_callee(ptr %func) + to label %ret unwind label %lpad, !prof !19 + +lpad: + %exn = landingpad {ptr, i32} + cleanup + unreachable + +ret: + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 2000} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"branch_weights", i64 1000} +!17 = !{!"function_entry_count", i32 1500} +!18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600} +!19 = !{!"branch_weights", i32 1500} + +; CHECK-LABEL: @caller( +; CHECK: invoke void %func( +; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]] +; CHECK: invoke void @inner_callee( +; CHECK-NEXT: {{.*}} !prof ![[PROF2:[0-9]+]] + +; CHECK-LABL: @callee( +; CHECK: invoke void %func( +; CHECK-NEXT: {{.*}} !prof ![[PROF1]] +; CHECK: invoke void @inner_callee( +; CHECK-NEXT: {{.*}} !prof ![[PROF2]] + +; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600} +; CHECK: ![[PROF2]] = !{!"branch_weights", i32 1500} diff --git a/llvm/test/Transforms/Inline/update_value_profile.ll b/llvm/test/Transforms/Inline/update_value_profile.ll new file mode 100644 index 0000000..daa95e9 --- /dev/null +++ b/llvm/test/Transforms/Inline/update_value_profile.ll @@ -0,0 +1,81 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; When 'callee' is inlined into caller1 and caller2, the indirect call value +; profiles of the inlined copy should be scaled based on callers' profiles, +; and the indirect call value profiles in 'callee' should be updated. +define i32 @callee(ptr %0, i32 %1) !prof !20 { +; CHECK-LABEL: define i32 @callee( +; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] { +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF1:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %3 = load ptr, ptr %0 + %5 = getelementptr inbounds i8, ptr %3, i64 8 + %6 = load ptr, ptr %5 + %7 = tail call i32 %6(ptr %0, i32 %1), !prof !17 + ret i32 %7 +} + +define i32 @caller1(i32 %0) !prof !18 { +; CHECK-LABEL: define i32 @caller1( +; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF2:![0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF3:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %2 = tail call ptr @_Z10createTypei(i32 %0) + %3 = tail call i32 @callee(ptr %2, i32 %0) + ret i32 %3 +} + +define i32 @caller2(i32 %0) !prof !19 { +; CHECK-LABEL: define i32 @caller2( +; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF4:![0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %2 = tail call ptr @_Z10createTypei(i32 %0) + %3 = tail call i32 @callee(ptr %2, i32 %0) + ret i32 %3 +} + +declare ptr @_Z10createTypei(i32) + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!17 = !{!"VP", i32 0, i64 1600, i64 123, i64 1000, i64 456, i64 600} +!18 = !{!"function_entry_count", i64 1000} +!19 = !{!"function_entry_count", i64 600} +!20 = !{!"function_entry_count", i64 1700} +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100} +; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 94, i64 123, i64 58, i64 456, i64 35} +; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 941, i64 123, i64 588, i64 456, i64 352} +; CHECK: [[PROF4]] = !{!"function_entry_count", i64 600} +; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 564, i64 123, i64 352, i64 456, i64 211} +;. |