diff options
author | Dmitry Chigarev <dmitry.chigarev@intel.com> | 2025-09-25 20:31:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-09-25 11:31:17 -0700 |
commit | 04258fe3b15c9ecf78848c9b1470e1048844989e (patch) | |
tree | 9d6c07a62ea54d78f60ed35a6ce6f9aa9ea5da4f /llvm/unittests/Support/CommandLineTest.cpp | |
parent | ca61a9d96027a34750149b856ffba746999b4e16 (diff) | |
download | llvm-04258fe3b15c9ecf78848c9b1470e1048844989e.zip llvm-04258fe3b15c9ecf78848c9b1470e1048844989e.tar.gz llvm-04258fe3b15c9ecf78848c9b1470e1048844989e.tar.bz2 |
[mlir][XeGPU][XeGPUUnroll] Support new syntax with offsets moved to load_nd/store_nd/prefetch_nd (#160323)
Adds support for new syntax in XeGPUUnroll for:
1. `create_nd_desc` without offsets
2. `load_nd` with offsets
3. `store_nd` with offsets
4. `prefetch_nd` with offsets
`create_nd_desc with offsets` + `load_nd with offsets` won't be lowered
correctly. In this case the IR would still have two unrealized
conversions that will fail later in the pipeline.
The offsets computation for the unrolled tile is now moved from
descriptors to load/store/prefetch operations. The resulted IR now has
one single descriptor that is being iterated in load/store/prefetch ops.
<details><summary>old/new behavior examples</summary>
```mlir
// before unroll pass:
gpu.func @load_nd(%src: memref<256x318xf32>) -> vector<24x32xf32> {
%tdesc = xegpu.create_nd_tdesc %src : memref<256x318xf32> -> !xegpu.tensor_desc<24x32xf32, #xegpu.layout<inst_data = [8, 16]>>
%ld = xegpu.load_nd %tdesc[8, 16]: !xegpu.tensor_desc<24x32xf32, #xegpu.layout<inst_data = [8, 16]>> -> vector<24x32xf32>
gpu.return %ld : vector<24x32xf32>
}
// after unroll pass (offsets in create_nd_desc):
gpu.func @create_nd_tdesc2(%arg0: memref<256x318xf32>) -> vector<24x32xf32> {
%cst = arith.constant dense<0.000000e+00> : vector<24x32xf32>
%c24 = arith.constant 24 : index
%c32 = arith.constant 32 : index
%c8 = arith.constant 8 : index
%c16 = arith.constant 16 : index
// create 6 descriptors for each tile
%0 = xegpu.create_nd_tdesc %arg0[%c8, %c16] : memref<256x318xf32> -> !xegpu.tensor_desc<8x16xf32>
%1 = xegpu.create_nd_tdesc %arg0[%c8, %c32] : memref<256x318xf32> -> !xegpu.tensor_desc<8x16xf32>
%2 = xegpu.create_nd_tdesc %arg0[%c16, %c16] : memref<256x318xf32> -> !xegpu.tensor_desc<8x16xf32>
%3 = xegpu.create_nd_tdesc %arg0[%c16, %c32] : memref<256x318xf32> -> !xegpu.tensor_desc<8x16xf32>
%4 = xegpu.create_nd_tdesc %arg0[%c24, %c16] : memref<256x318xf32> -> !xegpu.tensor_desc<8x16xf32>
%5 = xegpu.create_nd_tdesc %arg0[%c24, %c32] : memref<256x318xf32> -> !xegpu.tensor_desc<8x16xf32>
%6 = xegpu.load_nd %0 : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%7 = xegpu.load_nd %1 : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%8 = xegpu.load_nd %2 : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%9 = xegpu.load_nd %3 : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%10 = xegpu.load_nd %4 : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%11 = xegpu.load_nd %5 : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
...
}
// after unroll pass (offsets in load_nd):
gpu.func @load_nd(%arg0: memref<256x318xf32>) -> vector<24x32xf32> {
%cst = arith.constant dense<0.000000e+00> : vector<24x32xf32>
%c24 = arith.constant 24 : index
%c32 = arith.constant 32 : index
%c16 = arith.constant 16 : index
%c8 = arith.constant 8 : index
// create only one descriptor with proper tile shape
%0 = xegpu.create_nd_tdesc %arg0 : memref<256x318xf32> -> !xegpu.tensor_desc<8x16xf32>
// compute tile offsets at the operation (using only one descriptor)
%1 = xegpu.load_nd %0[%c8, %c16] : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%2 = xegpu.load_nd %0[%c8, %c32] : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%3 = xegpu.load_nd %0[%c16, %c16] : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%4 = xegpu.load_nd %0[%c16, %c32] : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%5 = xegpu.load_nd %0[%c24, %c16] : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
%6 = xegpu.load_nd %0[%c24, %c32] : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32>
...
}
```
</details>
---------
Signed-off-by: dchigarev <dmitry.chigarev@intel.com>
Diffstat (limited to 'llvm/unittests/Support/CommandLineTest.cpp')
0 files changed, 0 insertions, 0 deletions