aboutsummaryrefslogtreecommitdiff
path: root/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp')
-rw-r--r--mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp445
1 files changed, 383 insertions, 62 deletions
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index a173cf1..5672942 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/ADT/bit.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Support/InterleavedRange.h"
#include <cstddef>
#include <iterator>
#include <optional>
@@ -77,6 +78,232 @@ struct LLVMPointerPointerLikeModel
};
} // namespace
+/// Generate a name of a canonical loop nest of the format
+/// `<prefix>(_r<idx>_s<idx>)*`. Hereby, `_r<idx>` identifies the region
+/// argument index of an operation that has multiple regions, if the operation
+/// has multiple regions.
+/// `_s<idx>` identifies the position of an operation within a region, where
+/// only operations that may potentially contain loops ("container operations"
+/// i.e. have region arguments) are counted. Again, it is omitted if there is
+/// only one such operation in a region. If there are canonical loops nested
+/// inside each other, also may also use the format `_d<num>` where <num> is the
+/// nesting depth of the loop.
+///
+/// The generated name is a best-effort to make canonical loop unique within an
+/// SSA namespace. This also means that regions with IsolatedFromAbove property
+/// do not consider any parents or siblings.
+static std::string generateLoopNestingName(StringRef prefix,
+ CanonicalLoopOp op) {
+ struct Component {
+ /// If true, this component describes a region operand of an operation (the
+ /// operand's owner) If false, this component describes an operation located
+ /// in a parent region
+ bool isRegionArgOfOp;
+ bool skip = false;
+ bool isUnique = false;
+
+ size_t idx;
+ Operation *op;
+ Region *parentRegion;
+ size_t loopDepth;
+
+ Operation *&getOwnerOp() {
+ assert(isRegionArgOfOp && "Must describe a region operand");
+ return op;
+ }
+ size_t &getArgIdx() {
+ assert(isRegionArgOfOp && "Must describe a region operand");
+ return idx;
+ }
+
+ Operation *&getContainerOp() {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return op;
+ }
+ size_t &getOpPos() {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return idx;
+ }
+ bool isLoopOp() const {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return isa<CanonicalLoopOp>(op);
+ }
+ Region *&getParentRegion() {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return parentRegion;
+ }
+ size_t &getLoopDepth() {
+ assert(!isRegionArgOfOp && "Must describe a operation of a region");
+ return loopDepth;
+ }
+
+ void skipIf(bool v = true) { skip = skip || v; }
+ };
+
+ // List of ancestors, from inner to outer.
+ // Alternates between
+ // * region argument of an operation
+ // * operation within a region
+ SmallVector<Component> components;
+
+ // Gather a list of parent regions and operations, and the position within
+ // their parent
+ Operation *o = op.getOperation();
+ while (o) {
+ // Operation within a region
+ Region *r = o->getParentRegion();
+ if (!r)
+ break;
+
+ llvm::ReversePostOrderTraversal<Block *> traversal(&r->getBlocks().front());
+ size_t idx = 0;
+ bool found = false;
+ size_t sequentialIdx = -1;
+ bool isOnlyContainerOp = true;
+ for (Block *b : traversal) {
+ for (Operation &op : *b) {
+ if (&op == o && !found) {
+ sequentialIdx = idx;
+ found = true;
+ }
+ if (op.getNumRegions()) {
+ idx += 1;
+ if (idx > 1)
+ isOnlyContainerOp = false;
+ }
+ if (found && !isOnlyContainerOp)
+ break;
+ }
+ }
+
+ Component &containerOpInRegion = components.emplace_back();
+ containerOpInRegion.isRegionArgOfOp = false;
+ containerOpInRegion.isUnique = isOnlyContainerOp;
+ containerOpInRegion.getContainerOp() = o;
+ containerOpInRegion.getOpPos() = sequentialIdx;
+ containerOpInRegion.getParentRegion() = r;
+
+ Operation *parent = r->getParentOp();
+
+ // Region argument of an operation
+ Component &regionArgOfOperation = components.emplace_back();
+ regionArgOfOperation.isRegionArgOfOp = true;
+ regionArgOfOperation.isUnique = true;
+ regionArgOfOperation.getArgIdx() = 0;
+ regionArgOfOperation.getOwnerOp() = parent;
+
+ // The IsolatedFromAbove trait of the parent operation implies that each
+ // individual region argument has its own separate namespace, so no
+ // ambiguity.
+ if (!parent || parent->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
+ break;
+
+ // Component only needed if operation has multiple region operands. Region
+ // arguments may be optional, but we currently do not consider this.
+ if (parent->getRegions().size() > 1) {
+ auto getRegionIndex = [](Operation *o, Region *r) {
+ for (auto [idx, region] : llvm::enumerate(o->getRegions())) {
+ if (&region == r)
+ return idx;
+ }
+ llvm_unreachable("Region not child of its parent operation");
+ };
+ regionArgOfOperation.isUnique = false;
+ regionArgOfOperation.getArgIdx() = getRegionIndex(parent, r);
+ }
+
+ // next parent
+ o = parent;
+ }
+
+ // Determine whether a region-argument component is not needed
+ for (Component &c : components)
+ c.skipIf(c.isRegionArgOfOp && c.isUnique);
+
+ // Find runs of nested loops and determine each loop's depth in the loop nest
+ size_t numSurroundingLoops = 0;
+ for (Component &c : llvm::reverse(components)) {
+ if (c.skip)
+ continue;
+
+ // non-skipped multi-argument operands interrupt the loop nest
+ if (c.isRegionArgOfOp) {
+ numSurroundingLoops = 0;
+ continue;
+ }
+
+ // Multiple loops in a region means each of them is the outermost loop of a
+ // new loop nest
+ if (!c.isUnique)
+ numSurroundingLoops = 0;
+
+ c.getLoopDepth() = numSurroundingLoops;
+
+ // Next loop is surrounded by one more loop
+ if (isa<CanonicalLoopOp>(c.getContainerOp()))
+ numSurroundingLoops += 1;
+ }
+
+ // In loop nests, skip all but the innermost loop that contains the depth
+ // number
+ bool isLoopNest = false;
+ for (Component &c : components) {
+ if (c.skip || c.isRegionArgOfOp)
+ continue;
+
+ if (!isLoopNest && c.getLoopDepth() >= 1) {
+ // Innermost loop of a loop nest of at least two loops
+ isLoopNest = true;
+ } else if (isLoopNest) {
+ // Non-innermost loop of a loop nest
+ c.skipIf(c.isUnique);
+
+ // If there is no surrounding loop left, this must have been the outermost
+ // loop; leave loop-nest mode for the next iteration
+ if (c.getLoopDepth() == 0)
+ isLoopNest = false;
+ }
+ }
+
+ // Skip non-loop unambiguous regions (but they should interrupt loop nests, so
+ // we mark them as skipped only after computing loop nests)
+ for (Component &c : components)
+ c.skipIf(!c.isRegionArgOfOp && c.isUnique &&
+ !isa<CanonicalLoopOp>(c.getContainerOp()));
+
+ // Components can be skipped if they are already disambiguated by their parent
+ // (or does not have a parent)
+ bool newRegion = true;
+ for (Component &c : llvm::reverse(components)) {
+ c.skipIf(newRegion && c.isUnique);
+
+ // non-skipped components disambiguate unique children
+ if (!c.skip)
+ newRegion = true;
+
+ // ...except canonical loops that need a suffix for each nest
+ if (!c.isRegionArgOfOp && c.getContainerOp())
+ newRegion = false;
+ }
+
+ // Compile the nesting name string
+ SmallString<64> Name{prefix};
+ llvm::raw_svector_ostream NameOS(Name);
+ for (auto &c : llvm::reverse(components)) {
+ if (c.skip)
+ continue;
+
+ if (c.isRegionArgOfOp)
+ NameOS << "_r" << c.getArgIdx();
+ else if (c.getLoopDepth() >= 1)
+ NameOS << "_d" << c.getLoopDepth();
+ else
+ NameOS << "_s" << c.getOpPos();
+ }
+
+ return NameOS.str().str();
+}
+
void OpenMPDialect::initialize() {
addOperations<
#define GET_OP_LIST
@@ -3159,6 +3386,9 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
Value result = getResult();
auto [newCli, gen, cons] = decodeCli(result);
+ // Structured binding `gen` cannot be captured in lambdas before C++20
+ OpOperand *generator = gen;
+
// Derive the CLI variable name from its generator:
// * "canonloop" for omp.canonical_loop
// * custom name for loop transformation generatees
@@ -3172,71 +3402,29 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
cliName =
TypeSwitch<Operation *, std::string>(gen->getOwner())
.Case([&](CanonicalLoopOp op) {
- // Find the canonical loop nesting: For each ancestor add a
- // "+_r<idx>" suffix (in reverse order)
- SmallVector<std::string> components;
- Operation *o = op.getOperation();
- while (o) {
- if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
- break;
-
- Region *r = o->getParentRegion();
- if (!r)
- break;
-
- auto getSequentialIndex = [](Region *r, Operation *o) {
- llvm::ReversePostOrderTraversal<Block *> traversal(
- &r->getBlocks().front());
- size_t idx = 0;
- for (Block *b : traversal) {
- for (Operation &op : *b) {
- if (&op == o)
- return idx;
- // Only consider operations that are containers as
- // possible children
- if (!op.getRegions().empty())
- idx += 1;
- }
- }
- llvm_unreachable("Operation not part of the region");
- };
- size_t sequentialIdx = getSequentialIndex(r, o);
- components.push_back(("s" + Twine(sequentialIdx)).str());
-
- Operation *parent = r->getParentOp();
- if (!parent)
- break;
-
- // If the operation has more than one region, also count in
- // which of the regions
- if (parent->getRegions().size() > 1) {
- auto getRegionIndex = [](Operation *o, Region *r) {
- for (auto [idx, region] :
- llvm::enumerate(o->getRegions())) {
- if (&region == r)
- return idx;
- }
- llvm_unreachable("Region not child its parent operation");
- };
- size_t regionIdx = getRegionIndex(parent, r);
- components.push_back(("r" + Twine(regionIdx)).str());
- }
-
- // next parent
- o = parent;
- }
-
- SmallString<64> Name("canonloop");
- for (const std::string &s : reverse(components)) {
- Name += '_';
- Name += s;
- }
-
- return Name;
+ return generateLoopNestingName("canonloop", op);
})
.Case([&](UnrollHeuristicOp op) -> std::string {
llvm_unreachable("heuristic unrolling does not generate a loop");
})
+ .Case([&](TileOp op) -> std::string {
+ auto [generateesFirst, generateesCount] =
+ op.getGenerateesODSOperandIndexAndLength();
+ unsigned firstGrid = generateesFirst;
+ unsigned firstIntratile = generateesFirst + generateesCount / 2;
+ unsigned end = generateesFirst + generateesCount;
+ unsigned opnum = generator->getOperandNumber();
+ // In the OpenMP apply and looprange clauses, indices are 1-based
+ if (firstGrid <= opnum && opnum < firstIntratile) {
+ unsigned gridnum = opnum - firstGrid + 1;
+ return ("grid" + Twine(gridnum)).str();
+ }
+ if (firstIntratile <= opnum && opnum < end) {
+ unsigned intratilenum = opnum - firstIntratile + 1;
+ return ("intratile" + Twine(intratilenum)).str();
+ }
+ llvm_unreachable("Unexpected generatee argument");
+ })
.Default([&](Operation *op) {
assert(false && "TODO: Custom name for this operation");
return "transformed";
@@ -3323,7 +3511,8 @@ void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) {
void CanonicalLoopOp::getAsmBlockArgumentNames(Region &region,
OpAsmSetValueNameFn setNameFn) {
- setNameFn(region.getArgument(0), "iv");
+ std::string ivName = generateLoopNestingName("iv", *this);
+ setNameFn(region.getArgument(0), ivName);
}
void CanonicalLoopOp::print(OpAsmPrinter &p) {
@@ -3465,6 +3654,138 @@ UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() {
}
//===----------------------------------------------------------------------===//
+// TileOp
+//===----------------------------------------------------------------------===//
+
+static void printLoopTransformClis(OpAsmPrinter &p, TileOp op,
+ OperandRange generatees,
+ OperandRange applyees) {
+ if (!generatees.empty())
+ p << '(' << llvm::interleaved(generatees) << ')';
+
+ if (!applyees.empty())
+ p << " <- (" << llvm::interleaved(applyees) << ')';
+}
+
+static ParseResult parseLoopTransformClis(
+ OpAsmParser &parser,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &generateesOperands,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &applyeesOperands) {
+ if (parser.parseOptionalLess()) {
+ // Syntax 1: generatees present
+
+ if (parser.parseOperandList(generateesOperands,
+ mlir::OpAsmParser::Delimiter::Paren))
+ return failure();
+
+ if (parser.parseLess())
+ return failure();
+ } else {
+ // Syntax 2: generatees omitted
+ }
+
+ // Parse `<-` (`<` has already been parsed)
+ if (parser.parseMinus())
+ return failure();
+
+ if (parser.parseOperandList(applyeesOperands,
+ mlir::OpAsmParser::Delimiter::Paren))
+ return failure();
+
+ return success();
+}
+
+LogicalResult TileOp::verify() {
+ if (getApplyees().empty())
+ return emitOpError() << "must apply to at least one loop";
+
+ if (getSizes().size() != getApplyees().size())
+ return emitOpError() << "there must be one tile size for each applyee";
+
+ if (!getGeneratees().empty() &&
+ 2 * getSizes().size() != getGeneratees().size())
+ return emitOpError()
+ << "expecting two times the number of generatees than applyees";
+
+ DenseSet<Value> parentIVs;
+
+ Value parent = getApplyees().front();
+ for (auto &&applyee : llvm::drop_begin(getApplyees())) {
+ auto [parentCreate, parentGen, parentCons] = decodeCli(parent);
+ auto [create, gen, cons] = decodeCli(applyee);
+
+ if (!parentGen)
+ return emitOpError() << "applyee CLI has no generator";
+
+ auto parentLoop = dyn_cast_or_null<CanonicalLoopOp>(parentGen->getOwner());
+ if (!parentGen)
+ return emitOpError()
+ << "currently only supports omp.canonical_loop as applyee";
+
+ parentIVs.insert(parentLoop.getInductionVar());
+
+ if (!gen)
+ return emitOpError() << "applyee CLI has no generator";
+ auto loop = dyn_cast_or_null<CanonicalLoopOp>(gen->getOwner());
+ if (!loop)
+ return emitOpError()
+ << "currently only supports omp.canonical_loop as applyee";
+
+ // Canonical loop must be perfectly nested, i.e. the body of the parent must
+ // only contain the omp.canonical_loop of the nested loops, and
+ // omp.terminator
+ bool isPerfectlyNested = [&]() {
+ auto &parentBody = parentLoop.getRegion();
+ if (!parentBody.hasOneBlock())
+ return false;
+ auto &parentBlock = parentBody.getBlocks().front();
+
+ auto nestedLoopIt = parentBlock.begin();
+ if (nestedLoopIt == parentBlock.end() ||
+ (&*nestedLoopIt != loop.getOperation()))
+ return false;
+
+ auto termIt = std::next(nestedLoopIt);
+ if (termIt == parentBlock.end() || !isa<TerminatorOp>(termIt))
+ return false;
+
+ if (std::next(termIt) != parentBlock.end())
+ return false;
+
+ return true;
+ }();
+ if (!isPerfectlyNested)
+ return emitOpError() << "tiled loop nest must be perfectly nested";
+
+ if (parentIVs.contains(loop.getTripCount()))
+ return emitOpError() << "tiled loop nest must be rectangular";
+
+ parent = applyee;
+ }
+
+ // TODO: The tile sizes must be computed before the loop, but checking this
+ // requires dominance analysis. For instance:
+ //
+ // %canonloop = omp.new_cli
+ // omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // // write to %x
+ // omp.terminator
+ // }
+ // %ts = llvm.load %x
+ // omp.tile <- (%canonloop) sizes(%ts : i32)
+
+ return success();
+}
+
+std::pair<unsigned, unsigned> TileOp ::getApplyeesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_applyees);
+}
+
+std::pair<unsigned, unsigned> TileOp::getGenerateesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_generatees);
+}
+
+//===----------------------------------------------------------------------===//
// Critical construct (2.17.1)
//===----------------------------------------------------------------------===//