From ba62715a93a1a864b0ef8fd79468ae2b0714269f Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Tue, 26 Mar 2024 21:12:35 +0000 Subject: add an overlapping example for createDesc. --- mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 15 +++++++++++++++ mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 24 ++++++++++++------------ mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 4 ++-- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 41fe0ea..a031a75 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -406,13 +406,28 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> { elements accessed for each offset, default is 1. Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64] + ``` %a = memref.alloc() : memref<1024xf32> %1 = xegpu.create_tdesc %a[0, 16, 32, 64]: memref<1024xf32> -> TensorDesc<4xf32> + ``` Example 2. It assumes subgroup size is 4, and each workitem access 8 elements. It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71] + ``` %0 = memref.alloc() : memref<1024xf32> %1 = xegpu.create_tdesc %0[0, 16, 32, 64] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32> + ``` + + Example 3. It is similar to Example 2, but there is some overlaps among workitems. + It accesses: a[0:7], a[4:11], a[8:15], a[12:19] + ``` + %0 = memref.alloc() : memref<1024xf32> + %1 = xegpu.create_tdesc %0[0, 4, 8, 12] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32> + ``` + + + + }]; let arguments = (ins XeGPU_BaseAddrType: $source, diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index 0c62e51..4cd4e54 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -34,10 +34,10 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", [ShapedTypeInterface], "::mlir::TensorType"> { let summary = "TensorDesc describing regions of interested data."; let description = [{ - TensorDesc is a type designed to describe regions of the interested data as well as some - features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, - it essentially only contains the meta data, and doesn't hold the data by itself. It is designed - to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. + TensorDesc is a type designed to describe regions of the interested data as well as some + features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, + it essentially only contains the meta data, and doesn't hold the data by itself. It is designed + to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. It encodes the following information: * shape: the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows @@ -46,15 +46,15 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", is set or not. * element_type: the data type of the data element, e.g., f16, f32. - Similar to the builtin tensor, it also provides an optinal attribute to encoding + Similar to the builtin tensor, it also provides an optinal attribute to encoding the following information via the TensorDescAttr object: - * memory_scope (xegpu::MemoryScope): [optional] where the data is located, + * memory_scope (xegpu::MemoryScope): [optional] where the data is located, global memory or shared memory. It is default to Global. * array_length (int): [optional] The number of contiguous blocks with size as `shape`, that will be loaded by block load at a time. It is default to 1. - * boundary_check (bool): [optional] indicates whether the operation detects the boundary + * boundary_check (bool): [optional] indicates whether the operation detects the boundary and pads with zero for out-of-boundary access. It is default to do boundary check. - + Syntax: @@ -85,8 +85,8 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", OptionalParameter<"mlir::Attribute">: $encoding); let builders = [ - TypeBuilder<(ins - "llvm::ArrayRef": $shape, + TypeBuilderWithInferredContext<(ins + "llvm::ArrayRef": $shape, "mlir::Type": $elementType, CArg<"bool", "false">: $scattered, CArg<"int", "1">: $array_length, @@ -127,7 +127,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", if (attr && attr.getArrayLength()) return attr.getArrayLength().getInt(); // return default value - return 1; + return 1; } bool getBoundaryCheck() { @@ -148,7 +148,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", }]; let hasCustomAssemblyFormat = true; - + } #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 858cda3..24719fe 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -107,11 +107,11 @@ void TensorDescType::print(::mlir::AsmPrinter &printer) const { printer << ">"; } -TensorDescType TensorDescType::get(mlir::MLIRContext *context, - llvm::ArrayRef shape, +TensorDescType TensorDescType::get(llvm::ArrayRef shape, mlir::Type elementType, bool scattered, int array_length, MemoryScope memory_scope, bool boundary_check) { + auto context = elementType.getContext(); auto attr = TensorDescAttr::get(context, memory_scope, array_length, boundary_check, scattered); return Base::get(context, shape, elementType, attr); -- cgit v1.1