aboutsummaryrefslogtreecommitdiff
path: root/mlir/test/Conversion/ArmSMEToLLVM
diff options
context:
space:
mode:
authorAndrzej WarzyƄski <andrzej.warzynski@arm.com>2024-05-09 16:51:32 +0100
committerGitHub <noreply@github.com>2024-05-09 16:51:32 +0100
commitb1cbf4a7c3174471690e20360c2adae173608ae0 (patch)
treea6b3eef466944193f9bd1e1b632833379a904dc9 /mlir/test/Conversion/ArmSMEToLLVM
parentfe0b7983a2f510cdede22cdf6c9227e32ded6a15 (diff)
downloadllvm-b1cbf4a7c3174471690e20360c2adae173608ae0.zip
llvm-b1cbf4a7c3174471690e20360c2adae173608ae0.tar.gz
llvm-b1cbf4a7c3174471690e20360c2adae173608ae0.tar.bz2
[mlir][ArmSME] Add comments in tile-spills-and-fills.mlir (#91450)
* adds comments in tile-spills-and-fills.mlir * adds comments in ArmSMEIntrinsicOps.td * updates test in tile-spills-and-fills.mlir not to return 2D scalable vectors (e.g. vector<[4]x[4]xf32>) - that's not supported and not needed for that test
Diffstat (limited to 'mlir/test/Conversion/ArmSMEToLLVM')
-rw-r--r--mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir48
1 files changed, 43 insertions, 5 deletions
diff --git a/mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir b/mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir
index 7a9e6b4..a9c1a65 100644
--- a/mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir
+++ b/mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir
@@ -72,17 +72,32 @@ func.func @use_too_many_tiles() {
// AFTER-LLVM-LOWERING-DAG: %[[C8:.*]] = arith.constant 8 : index
// AFTER-LLVM-LOWERING-DAG: %[[VSCALE:.*]] = vector.vscale
// AFTER-LLVM-LOWERING-DAG: %[[SVL_H:.*]] = arith.muli %[[VSCALE]], %[[C8]] : index
+
+/// 0. Create an in-memory-tile
+/// Note: 16 is an in-memory tile ID, that is a tile ID >= 16
+
// AFTER-LLVM-LOWERING-DAG: %[[TILE_ALLOCA:.*]] = memref.alloca(%[[SVL_H]], %[[SVL_H]])
// AFTER-LLVM-LOWERING-SAME: {arm_sme.in_memory_tile_id = 16 : i32} : memref<?x?xi16>
//
// AFTER-LLVM-LOWERING-NOT: scf.for
-// Note: 17 is the mask for the 32-bit tile 0.
+
+/// 1. The following instruciton corresponds to %0 after tile allocation
+/// Note: 17 is the mask for the 32-bit tile 0.
+
// AFTER-LLVM-LOWERING: "arm_sme.intr.zero"() <{tile_mask = 17 : i32}>
//
// AFTER-LLVM-LOWERING-NOT: scf.for
-// Note: 34 is the mask for the 32-bit tile 1.
+
+/// 2. The following instruciton corresponds to %1 after tile allocation
+/// Note: 34 is the mask for the 32-bit tile 1.
+
// AFTER-LLVM-LOWERING: "arm_sme.intr.zero"() <{tile_mask = 34 : i32}>
-//
+
+/// 3. swap(<in-memory-tile>, tile 0).
+/// This can be interpreted as spilling %0 (the 32-bit tile 0), so that
+/// %2 can be allocated a tile (16 bit tile 0). Note that this is
+/// swapping vector<[8]x[8]xi16> rather than vector<[4]x[4]xi32>.
+
// AFTER-LLVM-LOWERING: scf.for
// AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_H]] step %[[C1]] {
// AFTER-LLVM-LOWERING: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[TILE_ALLOCA]]
@@ -92,8 +107,15 @@ func.func @use_too_many_tiles() {
// AFTER-LLVM-LOWERING-NEXT: "arm_sme.intr.ld1h.horiz"({{.*}}, %[[SLICE_PTR]], {{.*}}) <{tile_id = 0 : i32}>
// AFTER-LLVM-LOWERING-NEXT: vector.store %[[SLICE]], %[[TILE_ALLOCA]]
// AFTER-LLVM-LOWERING-NEXT: }
-// Note: 85 is the mask for the 16-bit tile 0.
+
+/// 4. The following instruciton corresponds to %3 after tile allocation
+/// Note: 85 is the mask for the 16-bit tile 0.
+
// AFTER-LLVM-LOWERING: "arm_sme.intr.zero"() <{tile_mask = 85 : i32}>
+
+/// 5. swap(<inMemoryTile>, tile 0)
+/// This can be interpreted as restoring %0.
+
// AFTER-LLVM-LOWERING: scf.for
// AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_H]] step %[[C1]] {
// AFTER-LLVM-LOWERING: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[TILE_ALLOCA]]
@@ -116,7 +138,7 @@ func.func @very_excessive_spills(%memref : memref<?x?xf32>) -> vector<[4]x[4]xf3
%tile = arm_sme.get_tile : vector<[4]x[4]xf32>
%mask = vector.constant_mask [4] : vector<[4]xi1>
%loadSlice = arm_sme.load_tile_slice %memref[%c0, %c0], %mask, %tile, %c0 : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
- return %loadSlice : vector<[4]x[4]xf32>
+ "test.some_use"(%loadSlice) : (vector<[4]x[4]xf32>) -> ()
}
// AFTER-TILE-ALLOC-LABEL: @very_excessive_spills
// AFTER-TILE-ALLOC: arm_sme.get_tile
@@ -133,22 +155,38 @@ func.func @very_excessive_spills(%memref : memref<?x?xf32>) -> vector<[4]x[4]xf3
// AFTER-LLVM-LOWERING-DAG: %[[TILE_ALLOCA:.*]] = memref.alloca(%[[SVL_S]], %[[SVL_S]])
// AFTER-LLVM-LOWERING-SAME: {arm_sme.in_memory_tile_id = 16 : i32} : memref<?x?xf32>
//
+
+/// 1. Swap %useAllTiles and %tile - note that this will only swap one 32-bit
+/// tile (vector<[4]x[4]xf32>)
+
// AFTER-LLVM-LOWERING: scf.for
// AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_S]] step %[[C1]] {
// AFTER-LLVM-LOWERING: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[TILE_ALLOCA]]
// AFTER-LLVM-LOWERING: %[[BASE_PTR:.*]] = llvm.extractvalue %[[MEM_DESC]][1]
// AFTER-LLVM-LOWERING: %[[SLICE_PTR:.*]] = llvm.getelementptr %[[BASE_PTR]]
+// Read ZA tile slice -> vector
// AFTER-LLVM-LOWERING: %[[SLICE:.*]] = "arm_sme.intr.read.horiz"{{.*}} <{tile_id = 0 : i32}>
+/// Load vector from memory -> ZA tile
// AFTER-LLVM-LOWERING-NEXT: "arm_sme.intr.ld1w.horiz"({{.*}}, %[[SLICE_PTR]], {{.*}}) <{tile_id = 0 : i32}>
+/// Store ZA tile slice in memory
// AFTER-LLVM-LOWERING-NEXT: vector.store %[[SLICE]], %[[TILE_ALLOCA]]
// AFTER-LLVM-LOWERING-NEXT: }
+
+/// 2. Load into %tile
// AFTER-LLVM-LOWERING: "arm_sme.intr.ld1w.horiz"{{.*}} <{tile_id = 0 : i32}>
+
+/// 3. Swap %useAllTiles and %tile - note that this will only swap one 32-bit
+/// tile (vector<[4]x[4]xf32>)
+
// AFTER-LLVM-LOWERING: scf.for
// AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_S]] step %[[C1]] {
// AFTER-LLVM-LOWERING: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[TILE_ALLOCA]]
// AFTER-LLVM-LOWERING: %[[BASE_PTR:.*]] = llvm.extractvalue %[[MEM_DESC]][1]
// AFTER-LLVM-LOWERING: %[[SLICE_PTR:.*]] = llvm.getelementptr %[[BASE_PTR]]
+/// Read ZA tile slice -> vector
// AFTER-LLVM-LOWERING: %[[SLICE:.*]] = "arm_sme.intr.read.horiz"{{.*}} <{tile_id = 0 : i32}>
+/// Load vector from memory -> ZA tile
// AFTER-LLVM-LOWERING-NEXT: "arm_sme.intr.ld1w.horiz"({{.*}}, %[[SLICE_PTR]], {{.*}}) <{tile_id = 0 : i32}>
+/// Store ZA tile slice in memory
// AFTER-LLVM-LOWERING-NEXT: vector.store %[[SLICE]], %[[TILE_ALLOCA]]
// AFTER-LLVM-LOWERING-NEXT: }