diff options
author | Guray Ozen <guray.ozen@gmail.com> | 2025-08-08 16:34:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-08 16:34:18 +0200 |
commit | 76a533c8ecbc57895c6df53a20ef7eae4e242a54 (patch) | |
tree | 1007ea94b883220853ceb7d47fb683abd4834ee6 | |
parent | 2c4b876fa842ca8bd87ed2abc028ebf3605a8b32 (diff) | |
download | llvm-76a533c8ecbc57895c6df53a20ef7eae4e242a54.zip llvm-76a533c8ecbc57895c6df53a20ef7eae4e242a54.tar.gz llvm-76a533c8ecbc57895c6df53a20ef7eae4e242a54.tar.bz2 |
[MLIR][NVVM] Add pmevent (#152509)
Add nvvm.pmevent Op that Triggers one or more of a fixed number of
performance monitor events, with event index or mask specified by
immediate operand.
[For more information, see PTX
ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent)
-rw-r--r-- | mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 38 | ||||
-rw-r--r-- | mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 20 | ||||
-rw-r--r-- | mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir | 14 | ||||
-rw-r--r-- | mlir/test/Target/LLVMIR/nvvmir-invalid.mlir | 19 | ||||
-rw-r--r-- | mlir/test/Target/LLVMIR/nvvmir.mlir | 11 |
5 files changed, 102 insertions, 0 deletions
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 30df3b7..8d50726 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -402,6 +402,44 @@ def NVVM_ReduxOp : } //===----------------------------------------------------------------------===// +// NVVM Performance Monitor events +//===----------------------------------------------------------------------===// + +def NVVM_PMEventOp : NVVM_PTXBuilder_Op<"pmevent">, + Arguments<(ins OptionalAttr<I16Attr>:$maskedEventId, + OptionalAttr<I32Attr>:$eventId)> { + let summary = "Trigger one or more Performance Monitor events."; + + let description = [{ + Triggers one or more of a fixed number of performance monitor events, with + event index or mask specified by immediate operand. + + Without `mask` it triggers a single performance monitor event indexed by + immediate operand a, in the range 0..15. + + With `mask` it triggers one or more of the performance monitor events. Each + bit in the 16-bit immediate operand controls an event. + + [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent) + }]; + + string llvmBuilder = [{ + llvm::Value *mId = builder.getInt16(* $maskedEventId); + createIntrinsicCall(builder, llvm::Intrinsic::nvvm_pm_event_mask, {mId}); + }]; + + let assemblyFormat = "attr-dict (`id` `=` $eventId^)? (`mask` `=` $maskedEventId^)?"; + + let extraClassDeclaration = [{ + bool hasIntrinsic() { return !getEventId(); } + }]; + let extraClassDefinition = [{ + std::string $cppClass::getPtx() { return std::string("pmevent %0;"); } + }]; + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// // NVVM Split arrive/wait barrier //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index e0977f5..7ad429e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -189,6 +189,26 @@ LogicalResult BulkStoreOp::verify() { return success(); } +LogicalResult PMEventOp::verify() { + auto eventId = getEventId(); + auto maskedEventId = getMaskedEventId(); + if (!maskedEventId && !eventId) { + return emitOpError() << "either `id` or `mask` must be set"; + } + + if (maskedEventId && eventId) { + return emitOpError() << "`id` and `mask` cannot be set at the same time"; + } + + if (eventId) { + if (eventId < 0 || eventId > 15) { + return emitOpError() << "`id` must be between 0 and 15"; + } + } + + return llvm::success(); +} + // Given the element type of an operand and whether or not it is an accumulator, // this function returns the PTX type (`NVVM::MMATypes`) that corresponds to the // operand's element type. diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index 580b09d..e505767 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -681,3 +681,17 @@ llvm.func @ex2(%input : f32, %pred : i1) { %1 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input), predicate = %pred : f32, i1 -> f32 llvm.return } + +// ----- + +// CHECK-LABEL: @nvvm_pmevent +llvm.func @nvvm_pmevent() { + // CHECK: %[[S0:.+]] = llvm.mlir.constant(10 : i32) : i32 + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "pmevent $0;", "n" %[[S0]] : (i32) -> () + + nvvm.pmevent id = 10 + // CHECK: %[[S1:.+]] = llvm.mlir.constant(4 : i32) : i32 + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "pmevent $0;", "n" %[[S1]] : (i32) -> () + nvvm.pmevent id = 4 + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir index 85478cc..991222c 100644 --- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir @@ -1,5 +1,24 @@ // RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s +llvm.func @pmevent_no_id() { + // expected-error @below {{either `id` or `mask` must be set}} + nvvm.pmevent +} + +// ----- + +llvm.func @pmevent_bigger15() { + // expected-error @below {{`id` must be between 0 and 15}} + nvvm.pmevent id = 141 +} + +// ----- + +llvm.func @pmevent_many_ids() { + // expected-error @below {{`id` and `mask` cannot be set at the same time}} + nvvm.pmevent id = 1 mask = 1 +} + // ----- llvm.func @kernel_func(%numberOfThreads : i32) { diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 5c2cfa4..b1800e8 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -918,3 +918,14 @@ llvm.func @nvvm_dot_accumulate_2way(%a: vector<2xi16>, %b: vector<4xi8>, %c: i32 %7 = nvvm.dot.accumulate.2way %a <signed>, %b <signed>, %c {b_hi = true}: vector<2xi16>, vector<4xi8> llvm.return } + +// ----- + +// CHECK-LABEL: @nvvm_pmevent +llvm.func @nvvm_pmevent() { + // CHECK: call void @llvm.nvvm.pm.event.mask(i16 15000) + nvvm.pmevent mask = 15000 + // CHECK: call void @llvm.nvvm.pm.event.mask(i16 4) + nvvm.pmevent mask = 4 + llvm.return +} |