1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
//===- IndexIntrinsicsOpLowering.h - GPU IndexOps Lowering class *- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
#define MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/BuiltinAttributes.h"
#include <limits>
namespace mlir {
namespace gpu {
namespace index_lowering {
enum class IndexKind : uint32_t { Other = 0, Block = 1, Grid = 2 };
enum class IntrType : uint32_t {
None = 0,
Id = 1,
Dim = 2,
};
// Rewriting that replaces Op with XOp, YOp, or ZOp depending on the dimension
// that Op operates on. Op is assumed to return an `index` value and
// XOp, YOp and ZOp are assumed to return an `llvm.i32` value. Depending on
// `indexBitwidth`, sign-extend or truncate the resulting value to match the
// bitwidth expected by the consumers of the value.
template <typename Op, typename XOp, typename YOp, typename ZOp>
struct OpLowering : public ConvertOpToLLVMPattern<Op> {
private:
unsigned indexBitwidth;
IndexKind indexKind;
IntrType intrType;
public:
explicit OpLowering(const LLVMTypeConverter &typeConverter,
PatternBenefit benefit = 1)
: ConvertOpToLLVMPattern<Op>(typeConverter, benefit),
indexBitwidth(typeConverter.getIndexTypeBitwidth()),
indexKind(IndexKind::Other), intrType(IntrType::None) {}
explicit OpLowering(const LLVMTypeConverter &typeConverter,
IndexKind indexKind, IntrType intrType,
PatternBenefit benefit = 1)
: ConvertOpToLLVMPattern<Op>(typeConverter, benefit),
indexBitwidth(typeConverter.getIndexTypeBitwidth()),
indexKind(indexKind), intrType(intrType) {}
// Convert the kernel arguments to an LLVM type, preserve the rest.
LogicalResult
matchAndRewrite(Op op, typename Op::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = op->getLoc();
MLIRContext *context = rewriter.getContext();
Operation *newOp;
switch (op.getDimension()) {
case gpu::Dimension::x:
newOp = XOp::create(rewriter, loc, IntegerType::get(context, 32));
break;
case gpu::Dimension::y:
newOp = YOp::create(rewriter, loc, IntegerType::get(context, 32));
break;
case gpu::Dimension::z:
newOp = ZOp::create(rewriter, loc, IntegerType::get(context, 32));
break;
}
// Order of priority for bounds:
// 1. The upper_bound attribute
// 2. Inherent attributes on a surrounding gpu.func
// 3. Discardable attributes on a surrounding function of any kind
// The below code handles these in reverse order so that more important
// sources overwrite less important ones.
DenseI32ArrayAttr funcBounds = nullptr;
if (auto funcOp = op->template getParentOfType<FunctionOpInterface>()) {
switch (indexKind) {
case IndexKind::Block: {
auto blockHelper =
gpu::GPUDialect::KnownBlockSizeAttrHelper(op.getContext());
if (blockHelper.isAttrPresent(funcOp))
funcBounds = blockHelper.getAttr(funcOp);
break;
}
case IndexKind::Grid: {
auto gridHelper =
gpu::GPUDialect::KnownGridSizeAttrHelper(op.getContext());
if (gridHelper.isAttrPresent(funcOp))
funcBounds = gridHelper.getAttr(funcOp);
break;
}
case IndexKind::Other:
break;
}
}
if (auto gpuFunc = op->template getParentOfType<gpu::GPUFuncOp>()) {
switch (indexKind) {
case IndexKind::Block:
funcBounds = gpuFunc.getKnownBlockSizeAttr();
break;
case IndexKind::Grid:
funcBounds = gpuFunc.getKnownGridSizeAttr();
break;
case IndexKind::Other:
break;
}
}
std::optional<int32_t> upperBound;
if (funcBounds)
upperBound =
funcBounds.asArrayRef()[static_cast<uint32_t>(op.getDimension())];
if (auto opBound = op.getUpperBound())
upperBound = opBound->getZExtValue();
if (upperBound && intrType != IntrType::None) {
int32_t min = (intrType == IntrType::Dim ? 1 : 0);
int32_t max = *upperBound == std::numeric_limits<int32_t>::max()
? *upperBound
: *upperBound + (intrType == IntrType::Id ? 0 : 1);
newOp->setAttr("range", LLVM::ConstantRangeAttr::get(
rewriter.getContext(), 32, min, max));
}
if (indexBitwidth > 32) {
newOp = LLVM::SExtOp::create(rewriter, loc,
IntegerType::get(context, indexBitwidth),
newOp->getResult(0));
} else if (indexBitwidth < 32) {
newOp = LLVM::TruncOp::create(rewriter, loc,
IntegerType::get(context, indexBitwidth),
newOp->getResult(0));
}
rewriter.replaceOp(op, newOp->getResults());
return success();
}
};
} // namespace index_lowering
} // namespace gpu
} // namespace mlir
#endif // MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
|