diff options
author | Andrzej WarzyĆski <andrzej.warzynski@arm.com> | 2024-05-09 16:51:32 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-09 16:51:32 +0100 |
commit | b1cbf4a7c3174471690e20360c2adae173608ae0 (patch) | |
tree | a6b3eef466944193f9bd1e1b632833379a904dc9 /mlir/test/Conversion/ArmSMEToLLVM | |
parent | fe0b7983a2f510cdede22cdf6c9227e32ded6a15 (diff) | |
download | llvm-b1cbf4a7c3174471690e20360c2adae173608ae0.zip llvm-b1cbf4a7c3174471690e20360c2adae173608ae0.tar.gz llvm-b1cbf4a7c3174471690e20360c2adae173608ae0.tar.bz2 |
[mlir][ArmSME] Add comments in tile-spills-and-fills.mlir (#91450)
* adds comments in tile-spills-and-fills.mlir
* adds comments in ArmSMEIntrinsicOps.td
* updates test in tile-spills-and-fills.mlir not to return 2D scalable
vectors (e.g. vector<[4]x[4]xf32>) - that's not supported and not
needed for that test
Diffstat (limited to 'mlir/test/Conversion/ArmSMEToLLVM')
-rw-r--r-- | mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir | 48 |
1 files changed, 43 insertions, 5 deletions
diff --git a/mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir b/mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir index 7a9e6b4..a9c1a65 100644 --- a/mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir +++ b/mlir/test/Conversion/ArmSMEToLLVM/tile-spills-and-fills.mlir @@ -72,17 +72,32 @@ func.func @use_too_many_tiles() { // AFTER-LLVM-LOWERING-DAG: %[[C8:.*]] = arith.constant 8 : index // AFTER-LLVM-LOWERING-DAG: %[[VSCALE:.*]] = vector.vscale // AFTER-LLVM-LOWERING-DAG: %[[SVL_H:.*]] = arith.muli %[[VSCALE]], %[[C8]] : index + +/// 0. Create an in-memory-tile +/// Note: 16 is an in-memory tile ID, that is a tile ID >= 16 + // AFTER-LLVM-LOWERING-DAG: %[[TILE_ALLOCA:.*]] = memref.alloca(%[[SVL_H]], %[[SVL_H]]) // AFTER-LLVM-LOWERING-SAME: {arm_sme.in_memory_tile_id = 16 : i32} : memref<?x?xi16> // // AFTER-LLVM-LOWERING-NOT: scf.for -// Note: 17 is the mask for the 32-bit tile 0. + +/// 1. The following instruciton corresponds to %0 after tile allocation +/// Note: 17 is the mask for the 32-bit tile 0. + // AFTER-LLVM-LOWERING: "arm_sme.intr.zero"() <{tile_mask = 17 : i32}> // // AFTER-LLVM-LOWERING-NOT: scf.for -// Note: 34 is the mask for the 32-bit tile 1. + +/// 2. The following instruciton corresponds to %1 after tile allocation +/// Note: 34 is the mask for the 32-bit tile 1. + // AFTER-LLVM-LOWERING: "arm_sme.intr.zero"() <{tile_mask = 34 : i32}> -// + +/// 3. swap(<in-memory-tile>, tile 0). +/// This can be interpreted as spilling %0 (the 32-bit tile 0), so that +/// %2 can be allocated a tile (16 bit tile 0). Note that this is +/// swapping vector<[8]x[8]xi16> rather than vector<[4]x[4]xi32>. + // AFTER-LLVM-LOWERING: scf.for // AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_H]] step %[[C1]] { // AFTER-LLVM-LOWERING: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[TILE_ALLOCA]] @@ -92,8 +107,15 @@ func.func @use_too_many_tiles() { // AFTER-LLVM-LOWERING-NEXT: "arm_sme.intr.ld1h.horiz"({{.*}}, %[[SLICE_PTR]], {{.*}}) <{tile_id = 0 : i32}> // AFTER-LLVM-LOWERING-NEXT: vector.store %[[SLICE]], %[[TILE_ALLOCA]] // AFTER-LLVM-LOWERING-NEXT: } -// Note: 85 is the mask for the 16-bit tile 0. + +/// 4. The following instruciton corresponds to %3 after tile allocation +/// Note: 85 is the mask for the 16-bit tile 0. + // AFTER-LLVM-LOWERING: "arm_sme.intr.zero"() <{tile_mask = 85 : i32}> + +/// 5. swap(<inMemoryTile>, tile 0) +/// This can be interpreted as restoring %0. + // AFTER-LLVM-LOWERING: scf.for // AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_H]] step %[[C1]] { // AFTER-LLVM-LOWERING: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[TILE_ALLOCA]] @@ -116,7 +138,7 @@ func.func @very_excessive_spills(%memref : memref<?x?xf32>) -> vector<[4]x[4]xf3 %tile = arm_sme.get_tile : vector<[4]x[4]xf32> %mask = vector.constant_mask [4] : vector<[4]xi1> %loadSlice = arm_sme.load_tile_slice %memref[%c0, %c0], %mask, %tile, %c0 : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32> - return %loadSlice : vector<[4]x[4]xf32> + "test.some_use"(%loadSlice) : (vector<[4]x[4]xf32>) -> () } // AFTER-TILE-ALLOC-LABEL: @very_excessive_spills // AFTER-TILE-ALLOC: arm_sme.get_tile @@ -133,22 +155,38 @@ func.func @very_excessive_spills(%memref : memref<?x?xf32>) -> vector<[4]x[4]xf3 // AFTER-LLVM-LOWERING-DAG: %[[TILE_ALLOCA:.*]] = memref.alloca(%[[SVL_S]], %[[SVL_S]]) // AFTER-LLVM-LOWERING-SAME: {arm_sme.in_memory_tile_id = 16 : i32} : memref<?x?xf32> // + +/// 1. Swap %useAllTiles and %tile - note that this will only swap one 32-bit +/// tile (vector<[4]x[4]xf32>) + // AFTER-LLVM-LOWERING: scf.for // AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_S]] step %[[C1]] { // AFTER-LLVM-LOWERING: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[TILE_ALLOCA]] // AFTER-LLVM-LOWERING: %[[BASE_PTR:.*]] = llvm.extractvalue %[[MEM_DESC]][1] // AFTER-LLVM-LOWERING: %[[SLICE_PTR:.*]] = llvm.getelementptr %[[BASE_PTR]] +// Read ZA tile slice -> vector // AFTER-LLVM-LOWERING: %[[SLICE:.*]] = "arm_sme.intr.read.horiz"{{.*}} <{tile_id = 0 : i32}> +/// Load vector from memory -> ZA tile // AFTER-LLVM-LOWERING-NEXT: "arm_sme.intr.ld1w.horiz"({{.*}}, %[[SLICE_PTR]], {{.*}}) <{tile_id = 0 : i32}> +/// Store ZA tile slice in memory // AFTER-LLVM-LOWERING-NEXT: vector.store %[[SLICE]], %[[TILE_ALLOCA]] // AFTER-LLVM-LOWERING-NEXT: } + +/// 2. Load into %tile // AFTER-LLVM-LOWERING: "arm_sme.intr.ld1w.horiz"{{.*}} <{tile_id = 0 : i32}> + +/// 3. Swap %useAllTiles and %tile - note that this will only swap one 32-bit +/// tile (vector<[4]x[4]xf32>) + // AFTER-LLVM-LOWERING: scf.for // AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_S]] step %[[C1]] { // AFTER-LLVM-LOWERING: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[TILE_ALLOCA]] // AFTER-LLVM-LOWERING: %[[BASE_PTR:.*]] = llvm.extractvalue %[[MEM_DESC]][1] // AFTER-LLVM-LOWERING: %[[SLICE_PTR:.*]] = llvm.getelementptr %[[BASE_PTR]] +/// Read ZA tile slice -> vector // AFTER-LLVM-LOWERING: %[[SLICE:.*]] = "arm_sme.intr.read.horiz"{{.*}} <{tile_id = 0 : i32}> +/// Load vector from memory -> ZA tile // AFTER-LLVM-LOWERING-NEXT: "arm_sme.intr.ld1w.horiz"({{.*}}, %[[SLICE_PTR]], {{.*}}) <{tile_id = 0 : i32}> +/// Store ZA tile slice in memory // AFTER-LLVM-LOWERING-NEXT: vector.store %[[SLICE]], %[[TILE_ALLOCA]] // AFTER-LLVM-LOWERING-NEXT: } |