aboutsummaryrefslogtreecommitdiff
path: root/mlir/lib
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2024-03-13 13:13:52 -0700
committerFangrui Song <i@maskray.me>2024-03-13 13:13:52 -0700
commit9ce8691dea8dadc1302abacf4302f3b805e1448d (patch)
treefdc2da3081156b4c9b80b0d417f090efadac946c /mlir/lib
parent795e3c3d94da0a664642d4580d87c82c02d5eca4 (diff)
parent744a23f24b08e8b988b176173c433d64761e66b3 (diff)
downloadllvm-users/MaskRay/spr/main.llvm-objcopy-add-compress-sections.zip
llvm-users/MaskRay/spr/main.llvm-objcopy-add-compress-sections.tar.gz
llvm-users/MaskRay/spr/main.llvm-objcopy-add-compress-sections.tar.bz2
[𝘀𝗽𝗿] changes introduced through rebaseusers/MaskRay/spr/main.llvm-objcopy-add-compress-sections
Created using spr 1.3.5-bogner [skip ci]
Diffstat (limited to 'mlir/lib')
-rw-r--r--mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp5
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp99
-rw-r--r--mlir/lib/Transforms/InlinerPass.cpp38
-rw-r--r--mlir/lib/Transforms/Utils/Inliner.cpp3
4 files changed, 115 insertions, 30 deletions
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 8a6980e..e7b899a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1209,7 +1209,7 @@ void ParallelOp::build(OpBuilder &builder, OperationState &state,
/*allocate_vars=*/ValueRange(), /*allocators_vars=*/ValueRange(),
/*reduction_vars=*/ValueRange(), /*reductions=*/nullptr,
/*proc_bind_val=*/nullptr, /*private_vars=*/ValueRange(),
- /*privatizers=*/nullptr);
+ /*privatizers=*/nullptr, /*byref=*/false);
state.addAttributes(attributes);
}
@@ -1674,7 +1674,8 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state,
/*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(),
/*reductions=*/nullptr, /*schedule_val=*/nullptr,
/*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
- /*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr,
+ /*simd_modifier=*/false, /*nowait=*/false, /*byref=*/false,
+ /*ordered_val=*/nullptr,
/*order_val=*/nullptr, /*inclusive=*/false);
state.addAttributes(attributes);
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index bef227f..5027f2a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -805,12 +805,12 @@ convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder,
/// Allocate space for privatized reduction variables.
template <typename T>
static void
-allocReductionVars(T loop, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
- SmallVector<omp::ReductionDeclareOp> &reductionDecls,
- SmallVector<llvm::Value *> &privateReductionVariables,
- DenseMap<Value, llvm::Value *> &reductionVariableMap) {
+allocByValReductionVars(T loop, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
+ SmallVector<omp::ReductionDeclareOp> &reductionDecls,
+ SmallVector<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.restoreIP(allocaIP);
auto args =
@@ -863,6 +863,7 @@ static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
auto loop = cast<omp::WsLoopOp>(opInst);
+ const bool isByRef = loop.getByref();
// TODO: this should be in the op verifier instead.
if (loop.getLowerBound().empty())
return failure();
@@ -888,18 +889,17 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> privateReductionVariables;
DenseMap<Value, llvm::Value *> reductionVariableMap;
- allocReductionVars(loop, builder, moduleTranslation, allocaIP, reductionDecls,
- privateReductionVariables, reductionVariableMap);
-
- // Store the mapping between reduction variables and their private copies on
- // ModuleTranslation stack. It can be then recovered when translating
- // omp.reduce operations in a separate call.
- LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
- moduleTranslation, reductionVariableMap);
+ if (!isByRef) {
+ allocByValReductionVars(loop, builder, moduleTranslation, allocaIP,
+ reductionDecls, privateReductionVariables,
+ reductionVariableMap);
+ }
// Before the loop, store the initial values of reductions into reduction
// variables. Although this could be done after allocas, we don't want to mess
// up with the alloca insertion point.
+ MutableArrayRef<BlockArgument> reductionArgs =
+ loop.getRegion().getArguments().take_back(loop.getNumReductionVars());
for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;
if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
@@ -908,9 +908,31 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
return failure();
assert(phis.size() == 1 && "expected one value to be yielded from the "
"reduction neutral element declaration region");
- builder.CreateStore(phis[0], privateReductionVariables[i]);
+ if (isByRef) {
+ // Allocate reduction variable (which is a pointer to the real reduction
+ // variable allocated in the inlined region)
+ llvm::Value *var = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ // Store the result of the inlined region to the allocated reduction var
+ // ptr
+ builder.CreateStore(phis[0], var);
+
+ privateReductionVariables.push_back(var);
+ moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+ reductionVariableMap.try_emplace(loop.getReductionVars()[i], phis[0]);
+ } else {
+ // for by-ref case the store is inside of the reduction region
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ // the rest was handled in allocByValReductionVars
+ }
}
+ // Store the mapping between reduction variables and their private copies on
+ // ModuleTranslation stack. It can be then recovered when translating
+ // omp.reduce operations in a separate call.
+ LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
+ moduleTranslation, reductionVariableMap);
+
// Set up the source location value for OpenMP runtime.
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
@@ -1014,7 +1036,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
builder.SetInsertPoint(tempTerminator);
llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
- loop.getNowait());
+ loop.getNowait(), isByRef);
if (!contInsertPoint.getBlock())
return loop->emitOpError() << "failed to convert reductions";
auto nextInsertionPoint =
@@ -1068,6 +1090,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
OmpParallelOpConversionManager raii(opInst);
+ const bool isByRef = opInst.getByref();
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
// relying on captured variables.
@@ -1082,18 +1105,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Allocate reduction vars
SmallVector<llvm::Value *> privateReductionVariables;
DenseMap<Value, llvm::Value *> reductionVariableMap;
- allocReductionVars(opInst, builder, moduleTranslation, allocaIP,
- reductionDecls, privateReductionVariables,
- reductionVariableMap);
-
- // Store the mapping between reduction variables and their private copies on
- // ModuleTranslation stack. It can be then recovered when translating
- // omp.reduce operations in a separate call.
- LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
- moduleTranslation, reductionVariableMap);
+ if (!isByRef) {
+ allocByValReductionVars(opInst, builder, moduleTranslation, allocaIP,
+ reductionDecls, privateReductionVariables,
+ reductionVariableMap);
+ }
// Initialize reduction vars
builder.restoreIP(allocaIP);
+ MutableArrayRef<BlockArgument> reductionArgs =
+ opInst.getRegion().getArguments().take_back(
+ opInst.getNumReductionVars());
for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;
if (failed(inlineConvertOmpRegions(
@@ -1104,9 +1126,32 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
"expected one value to be yielded from the "
"reduction neutral element declaration region");
builder.restoreIP(allocaIP);
- builder.CreateStore(phis[0], privateReductionVariables[i]);
+
+ if (isByRef) {
+ // Allocate reduction variable (which is a pointer to the real reduciton
+ // variable allocated in the inlined region)
+ llvm::Value *var = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ // Store the result of the inlined region to the allocated reduction var
+ // ptr
+ builder.CreateStore(phis[0], var);
+
+ privateReductionVariables.push_back(var);
+ moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+ reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]);
+ } else {
+ // for by-ref case the store is inside of the reduction init region
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ // the rest is done in allocByValReductionVars
+ }
}
+ // Store the mapping between reduction variables and their private copies on
+ // ModuleTranslation stack. It can be then recovered when translating
+ // omp.reduce operations in a separate call.
+ LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
+ moduleTranslation, reductionVariableMap);
+
// Save the alloca insertion point on ModuleTranslation stack for use in
// nested regions.
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
@@ -1137,7 +1182,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
ompBuilder->createReductions(builder.saveIP(), allocaIP,
- reductionInfos, false);
+ reductionInfos, false, isByRef);
if (!contInsertPoint.getBlock()) {
bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
return;
diff --git a/mlir/lib/Transforms/InlinerPass.cpp b/mlir/lib/Transforms/InlinerPass.cpp
index c058e80..08d8dbf 100644
--- a/mlir/lib/Transforms/InlinerPass.cpp
+++ b/mlir/lib/Transforms/InlinerPass.cpp
@@ -24,6 +24,8 @@ namespace mlir {
#include "mlir/Transforms/Passes.h.inc"
} // namespace mlir
+#define DEBUG_TYPE "inliner-pass"
+
using namespace mlir;
/// This function implements the inliner optimization pipeline.
@@ -88,6 +90,35 @@ InlinerPass::InlinerPass(std::function<void(OpPassManager &)> defaultPipeline,
config.setOpPipelines(std::move(opPipelines));
}
+// Return true if the inlining ratio does not exceed the threshold.
+static bool isProfitableToInline(const Inliner::ResolvedCall &resolvedCall,
+ unsigned inliningThreshold) {
+ Region *callerRegion = resolvedCall.sourceNode->getCallableRegion();
+ Region *calleeRegion = resolvedCall.targetNode->getCallableRegion();
+
+ // We should not get external nodes here, but just return true
+ // for now to preserve the original behavior of the inliner pass.
+ if (!calleeRegion || !calleeRegion)
+ return true;
+
+ auto countOps = [](Region *region) {
+ unsigned count = 0;
+ region->walk([&](Operation *) { ++count; });
+ return count;
+ };
+
+ unsigned callerOps = countOps(callerRegion);
+
+ // Always inline empty callees (if it is possible at all).
+ if (callerOps == 0)
+ return true;
+
+ unsigned ratio = countOps(calleeRegion) * 100 / callerOps;
+ LLVM_DEBUG(llvm::dbgs() << "Callee / caller operation ratio (max: "
+ << inliningThreshold << "%): " << ratio << "%\n");
+ return ratio <= inliningThreshold;
+}
+
void InlinerPass::runOnOperation() {
CallGraph &cg = getAnalysis<CallGraph>();
@@ -100,9 +131,14 @@ void InlinerPass::runOnOperation() {
return signalPassFailure();
}
+ // By default, assume that any inlining is profitable.
+ auto profitabilityCb = [=](const Inliner::ResolvedCall &call) {
+ return isProfitableToInline(call, inliningThreshold);
+ };
+
// Get an instance of the inliner.
Inliner inliner(op, cg, *this, getAnalysisManager(), runPipelineHelper,
- config);
+ config, profitabilityCb);
// Run the inlining.
if (failed(inliner.doInlining()))
diff --git a/mlir/lib/Transforms/Utils/Inliner.cpp b/mlir/lib/Transforms/Utils/Inliner.cpp
index f227ced..8acfc96 100644
--- a/mlir/lib/Transforms/Utils/Inliner.cpp
+++ b/mlir/lib/Transforms/Utils/Inliner.cpp
@@ -741,6 +741,9 @@ bool Inliner::Impl::shouldInline(ResolvedCall &resolvedCall) {
if (calleeHasMultipleBlocks && !callerRegionSupportsMultipleBlocks())
return false;
+ if (!inliner.isProfitableToInline(resolvedCall))
+ return false;
+
// Otherwise, inline.
return true;
}