diff options
author | Noah Goldstein <goldstein.w.n@gmail.com> | 2024-03-13 13:13:52 -0700 |
---|---|---|
committer | Fangrui Song <i@maskray.me> | 2024-03-13 13:13:52 -0700 |
commit | 9ce8691dea8dadc1302abacf4302f3b805e1448d (patch) | |
tree | fdc2da3081156b4c9b80b0d417f090efadac946c /mlir/lib | |
parent | 795e3c3d94da0a664642d4580d87c82c02d5eca4 (diff) | |
parent | 744a23f24b08e8b988b176173c433d64761e66b3 (diff) | |
download | llvm-users/MaskRay/spr/main.llvm-objcopy-add-compress-sections.zip llvm-users/MaskRay/spr/main.llvm-objcopy-add-compress-sections.tar.gz llvm-users/MaskRay/spr/main.llvm-objcopy-add-compress-sections.tar.bz2 |
[𝘀𝗽𝗿] changes introduced through rebaseusers/MaskRay/spr/main.llvm-objcopy-add-compress-sections
Created using spr 1.3.5-bogner
[skip ci]
Diffstat (limited to 'mlir/lib')
-rw-r--r-- | mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 | ||||
-rw-r--r-- | mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 99 | ||||
-rw-r--r-- | mlir/lib/Transforms/InlinerPass.cpp | 38 | ||||
-rw-r--r-- | mlir/lib/Transforms/Utils/Inliner.cpp | 3 |
4 files changed, 115 insertions, 30 deletions
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 8a6980e..e7b899a 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1209,7 +1209,7 @@ void ParallelOp::build(OpBuilder &builder, OperationState &state, /*allocate_vars=*/ValueRange(), /*allocators_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(), /*reductions=*/nullptr, /*proc_bind_val=*/nullptr, /*private_vars=*/ValueRange(), - /*privatizers=*/nullptr); + /*privatizers=*/nullptr, /*byref=*/false); state.addAttributes(attributes); } @@ -1674,7 +1674,8 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state, /*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(), /*reductions=*/nullptr, /*schedule_val=*/nullptr, /*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr, - /*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr, + /*simd_modifier=*/false, /*nowait=*/false, /*byref=*/false, + /*ordered_val=*/nullptr, /*order_val=*/nullptr, /*inclusive=*/false); state.addAttributes(attributes); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index bef227f..5027f2a 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -805,12 +805,12 @@ convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder, /// Allocate space for privatized reduction variables. template <typename T> static void -allocReductionVars(T loop, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, - SmallVector<omp::ReductionDeclareOp> &reductionDecls, - SmallVector<llvm::Value *> &privateReductionVariables, - DenseMap<Value, llvm::Value *> &reductionVariableMap) { +allocByValReductionVars(T loop, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, + SmallVector<omp::ReductionDeclareOp> &reductionDecls, + SmallVector<llvm::Value *> &privateReductionVariables, + DenseMap<Value, llvm::Value *> &reductionVariableMap) { llvm::IRBuilderBase::InsertPointGuard guard(builder); builder.restoreIP(allocaIP); auto args = @@ -863,6 +863,7 @@ static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { auto loop = cast<omp::WsLoopOp>(opInst); + const bool isByRef = loop.getByref(); // TODO: this should be in the op verifier instead. if (loop.getLowerBound().empty()) return failure(); @@ -888,18 +889,17 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, SmallVector<llvm::Value *> privateReductionVariables; DenseMap<Value, llvm::Value *> reductionVariableMap; - allocReductionVars(loop, builder, moduleTranslation, allocaIP, reductionDecls, - privateReductionVariables, reductionVariableMap); - - // Store the mapping between reduction variables and their private copies on - // ModuleTranslation stack. It can be then recovered when translating - // omp.reduce operations in a separate call. - LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( - moduleTranslation, reductionVariableMap); + if (!isByRef) { + allocByValReductionVars(loop, builder, moduleTranslation, allocaIP, + reductionDecls, privateReductionVariables, + reductionVariableMap); + } // Before the loop, store the initial values of reductions into reduction // variables. Although this could be done after allocas, we don't want to mess // up with the alloca insertion point. + MutableArrayRef<BlockArgument> reductionArgs = + loop.getRegion().getArguments().take_back(loop.getNumReductionVars()); for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) { SmallVector<llvm::Value *> phis; if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(), @@ -908,9 +908,31 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, return failure(); assert(phis.size() == 1 && "expected one value to be yielded from the " "reduction neutral element declaration region"); - builder.CreateStore(phis[0], privateReductionVariables[i]); + if (isByRef) { + // Allocate reduction variable (which is a pointer to the real reduction + // variable allocated in the inlined region) + llvm::Value *var = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); + // Store the result of the inlined region to the allocated reduction var + // ptr + builder.CreateStore(phis[0], var); + + privateReductionVariables.push_back(var); + moduleTranslation.mapValue(reductionArgs[i], phis[0]); + reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]); + } else { + // for by-ref case the store is inside of the reduction region + builder.CreateStore(phis[0], privateReductionVariables[i]); + // the rest was handled in allocByValReductionVars + } } + // Store the mapping between reduction variables and their private copies on + // ModuleTranslation stack. It can be then recovered when translating + // omp.reduce operations in a separate call. + LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( + moduleTranslation, reductionVariableMap); + // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); @@ -1014,7 +1036,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, builder.SetInsertPoint(tempTerminator); llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, - loop.getNowait()); + loop.getNowait(), isByRef); if (!contInsertPoint.getBlock()) return loop->emitOpError() << "failed to convert reductions"; auto nextInsertionPoint = @@ -1068,6 +1090,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; OmpParallelOpConversionManager raii(opInst); + const bool isByRef = opInst.getByref(); // TODO: support error propagation in OpenMPIRBuilder and use it instead of // relying on captured variables. @@ -1082,18 +1105,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // Allocate reduction vars SmallVector<llvm::Value *> privateReductionVariables; DenseMap<Value, llvm::Value *> reductionVariableMap; - allocReductionVars(opInst, builder, moduleTranslation, allocaIP, - reductionDecls, privateReductionVariables, - reductionVariableMap); - - // Store the mapping between reduction variables and their private copies on - // ModuleTranslation stack. It can be then recovered when translating - // omp.reduce operations in a separate call. - LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( - moduleTranslation, reductionVariableMap); + if (!isByRef) { + allocByValReductionVars(opInst, builder, moduleTranslation, allocaIP, + reductionDecls, privateReductionVariables, + reductionVariableMap); + } // Initialize reduction vars builder.restoreIP(allocaIP); + MutableArrayRef<BlockArgument> reductionArgs = + opInst.getRegion().getArguments().take_back( + opInst.getNumReductionVars()); for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { SmallVector<llvm::Value *> phis; if (failed(inlineConvertOmpRegions( @@ -1104,9 +1126,32 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, "expected one value to be yielded from the " "reduction neutral element declaration region"); builder.restoreIP(allocaIP); - builder.CreateStore(phis[0], privateReductionVariables[i]); + + if (isByRef) { + // Allocate reduction variable (which is a pointer to the real reduciton + // variable allocated in the inlined region) + llvm::Value *var = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); + // Store the result of the inlined region to the allocated reduction var + // ptr + builder.CreateStore(phis[0], var); + + privateReductionVariables.push_back(var); + moduleTranslation.mapValue(reductionArgs[i], phis[0]); + reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]); + } else { + // for by-ref case the store is inside of the reduction init region + builder.CreateStore(phis[0], privateReductionVariables[i]); + // the rest is done in allocByValReductionVars + } } + // Store the mapping between reduction variables and their private copies on + // ModuleTranslation stack. It can be then recovered when translating + // omp.reduce operations in a separate call. + LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard( + moduleTranslation, reductionVariableMap); + // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( @@ -1137,7 +1182,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = ompBuilder->createReductions(builder.saveIP(), allocaIP, - reductionInfos, false); + reductionInfos, false, isByRef); if (!contInsertPoint.getBlock()) { bodyGenStatus = opInst->emitOpError() << "failed to convert reductions"; return; diff --git a/mlir/lib/Transforms/InlinerPass.cpp b/mlir/lib/Transforms/InlinerPass.cpp index c058e80..08d8dbf 100644 --- a/mlir/lib/Transforms/InlinerPass.cpp +++ b/mlir/lib/Transforms/InlinerPass.cpp @@ -24,6 +24,8 @@ namespace mlir { #include "mlir/Transforms/Passes.h.inc" } // namespace mlir +#define DEBUG_TYPE "inliner-pass" + using namespace mlir; /// This function implements the inliner optimization pipeline. @@ -88,6 +90,35 @@ InlinerPass::InlinerPass(std::function<void(OpPassManager &)> defaultPipeline, config.setOpPipelines(std::move(opPipelines)); } +// Return true if the inlining ratio does not exceed the threshold. +static bool isProfitableToInline(const Inliner::ResolvedCall &resolvedCall, + unsigned inliningThreshold) { + Region *callerRegion = resolvedCall.sourceNode->getCallableRegion(); + Region *calleeRegion = resolvedCall.targetNode->getCallableRegion(); + + // We should not get external nodes here, but just return true + // for now to preserve the original behavior of the inliner pass. + if (!calleeRegion || !calleeRegion) + return true; + + auto countOps = [](Region *region) { + unsigned count = 0; + region->walk([&](Operation *) { ++count; }); + return count; + }; + + unsigned callerOps = countOps(callerRegion); + + // Always inline empty callees (if it is possible at all). + if (callerOps == 0) + return true; + + unsigned ratio = countOps(calleeRegion) * 100 / callerOps; + LLVM_DEBUG(llvm::dbgs() << "Callee / caller operation ratio (max: " + << inliningThreshold << "%): " << ratio << "%\n"); + return ratio <= inliningThreshold; +} + void InlinerPass::runOnOperation() { CallGraph &cg = getAnalysis<CallGraph>(); @@ -100,9 +131,14 @@ void InlinerPass::runOnOperation() { return signalPassFailure(); } + // By default, assume that any inlining is profitable. + auto profitabilityCb = [=](const Inliner::ResolvedCall &call) { + return isProfitableToInline(call, inliningThreshold); + }; + // Get an instance of the inliner. Inliner inliner(op, cg, *this, getAnalysisManager(), runPipelineHelper, - config); + config, profitabilityCb); // Run the inlining. if (failed(inliner.doInlining())) diff --git a/mlir/lib/Transforms/Utils/Inliner.cpp b/mlir/lib/Transforms/Utils/Inliner.cpp index f227ced..8acfc96 100644 --- a/mlir/lib/Transforms/Utils/Inliner.cpp +++ b/mlir/lib/Transforms/Utils/Inliner.cpp @@ -741,6 +741,9 @@ bool Inliner::Impl::shouldInline(ResolvedCall &resolvedCall) { if (calleeHasMultipleBlocks && !callerRegionSupportsMultipleBlocks()) return false; + if (!inliner.isProfitableToInline(resolvedCall)) + return false; + // Otherwise, inline. return true; } |