aboutsummaryrefslogtreecommitdiff
path: root/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
blob: 1775b5fa4a346acd4f690ab601dc49be22466a95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
// RUN: mlir-opt %s -test-vector-transfer-flatten-patterns -split-input-file | FileCheck %s
// RUN: mlir-opt %s -test-vector-transfer-flatten-patterns=target-vector-bitwidth=128 -split-input-file | FileCheck %s --check-prefix=CHECK-128B

func.func @transfer_read_dims_match_contiguous(
      %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> vector<5x4x3x2xi8> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0 : i8
    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
      memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, vector<5x4x3x2xi8>
    return %v : vector<5x4x3x2xi8>
}

// CHECK-LABEL: func @transfer_read_dims_match_contiguous
// CHECK-SAME:      %[[ARG:[0-9a-zA-Z]+]]: memref<5x4x3x2xi8
// CHECK:         %[[COLLAPSED:.+]] = memref.collapse_shape %[[ARG]] {{.}}[0, 1, 2, 3]
// CHECK:         %[[READ1D:.+]] = vector.transfer_read %[[COLLAPSED]]
// CHECK:         %[[VEC2D:.+]] = vector.shape_cast %[[READ1D]] : vector<120xi8> to vector<5x4x3x2xi8>
// CHECK:         return %[[VEC2D]]

// CHECK-128B-LABEL: func @transfer_read_dims_match_contiguous
//       CHECK-128B:   memref.collapse_shape

// -----

func.func @transfer_read_dims_match_contiguous_empty_stride(
    %arg : memref<5x4x3x2xi8>) -> vector<5x4x3x2xi8> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0 : i8
    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
      memref<5x4x3x2xi8>, vector<5x4x3x2xi8>
    return %v : vector<5x4x3x2xi8>
}

// CHECK-LABEL: func @transfer_read_dims_match_contiguous_empty_stride(
// CHECK-SAME:    %[[ARG:[0-9a-zA-Z]+]]: memref<5x4x3x2xi8
// CHECK:         %[[COLLAPSED:.+]] = memref.collapse_shape %[[ARG]] {{.}}[0, 1, 2, 3]
// CHECK:         %[[READ1D:.+]] = vector.transfer_read %[[COLLAPSED]]
// CHECK:         %[[VEC2D:.+]] = vector.shape_cast %[[READ1D]] : vector<120xi8> to vector<5x4x3x2xi8>
// CHECK:         return %[[VEC2D]]

// CHECK-128B-LABEL: func @transfer_read_dims_match_contiguous_empty_stride(
//       CHECK-128B:   memref.collapse_shape

// -----

// The shape of the memref and the vector don't match, but the vector is a
// contiguous subset of the memref, so "flattenable".

func.func @transfer_read_dims_mismatch_contiguous(
      %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> vector<1x1x2x2xi8> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0 : i8
    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
      memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, vector<1x1x2x2xi8>
    return %v : vector<1x1x2x2xi8>
}

// CHECK-LABEL:   func.func @transfer_read_dims_mismatch_contiguous(
// CHECK-SAME:                                           %[[VAL_0:.*]]: memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> vector<1x1x2x2xi8> {
// CHECK:           %[[VAL_1:.*]] = arith.constant 0 : i8
// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
// CHECK:           %[[VAL_3:.*]] = memref.collapse_shape %[[VAL_0]] {{\[\[}}0, 1, 2, 3]] : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>> into memref<120xi8, strided<[1], offset: ?>>
// CHECK:           %[[VAL_4:.*]] = vector.transfer_read %[[VAL_3]]{{\[}}%[[VAL_2]]], %[[VAL_1]] {in_bounds = [true]} : memref<120xi8, strided<[1], offset: ?>>, vector<4xi8>
// CHECK:           %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : vector<4xi8> to vector<1x1x2x2xi8>
// CHECK:           return %[[VAL_5]] : vector<1x1x2x2xi8>

// CHECK-128B-LABEL: func @transfer_read_dims_mismatch_contiguous(
//       CHECK-128B:   memref.collapse_shape

// -----

func.func @transfer_read_dims_mismatch_non_zero_indices(
                     %idx_1: index,
                     %idx_2: index,
                     %m_in: memref<1x43x4x6xi32>,
                     %m_out: memref<1x2x6xi32>) {
  %c0 = arith.constant 0 : index
  %c0_i32 = arith.constant 0 : i32
  %2 = vector.transfer_read %m_in[%c0, %idx_1, %idx_2, %c0], %c0_i32 {in_bounds = [true, true, true]} :
    memref<1x43x4x6xi32>, vector<1x2x6xi32>
  vector.transfer_write %2, %m_out[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
    vector<1x2x6xi32>, memref<1x2x6xi32>
  return
}

// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 * 43)>

// CHECK-LABEL:   func.func @transfer_read_dims_mismatch_non_zero_indices(
// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
// CHECK-SAME:      %[[M_IN:.*]]: memref<1x43x4x6xi32>,
// CHECK-SAME:      %[[M_OUT:.*]]: memref<1x2x6xi32>) {
// CHECK:           %[[C_0:.*]] = arith.constant 0 : i32
// CHECK:           %[[C_0_IDX:.*]] = arith.constant 0 : index
// CHECK:           %[[COLLAPSED_IN:.*]] = memref.collapse_shape %[[M_IN]] {{\[}}[0], [1, 2, 3]] : memref<1x43x4x6xi32> into memref<1x1032xi32>
// CHECK:           %[[COLLAPSED_IDX:.*]] = affine.apply #[[$ATTR_0]]()[%[[IDX_2]], %[[IDX_1]]]
// CHECK:           %[[READ:.*]] = vector.transfer_read %[[COLLAPSED_IN]][%[[C_0_IDX]], %[[COLLAPSED_IDX]]], %[[C_0]] {in_bounds = [true]} : memref<1x1032xi32>, vector<12xi32>
// CHECK:           %[[COLLAPSED_OUT:.*]] = memref.collapse_shape %[[M_OUT]] {{\[}}[0, 1, 2]] : memref<1x2x6xi32> into memref<12xi32>
// CHECK:           vector.transfer_write %[[READ]], %[[COLLAPSED_OUT]][%[[C_0_IDX]]] {in_bounds = [true]} : vector<12xi32>, memref<12xi32>

// CHECK-128B-LABEL: func @transfer_read_dims_mismatch_non_zero_indices(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

// The input memref has a dynamic trailing shape and hence is not flattened.
// TODO: This case could be supported via memref.dim

func.func @transfer_read_dims_mismatch_non_zero_indices_dynamic_shapes(
                     %idx_1: index,
                     %idx_2: index,
                     %m_in: memref<1x?x4x6xi32>,
                     %m_out: memref<1x2x6xi32>) {
  %c0 = arith.constant 0 : index
  %c0_i32 = arith.constant 0 : i32
  %2 = vector.transfer_read %m_in[%c0, %idx_1, %idx_2, %c0], %c0_i32 {in_bounds = [true, true, true]} :
    memref<1x?x4x6xi32>, vector<1x2x6xi32>
  vector.transfer_write %2, %m_out[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
    vector<1x2x6xi32>, memref<1x2x6xi32>
  return
}

// CHECK-LABEL:   func.func @transfer_read_dims_mismatch_non_zero_indices_dynamic_shapes(
// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
// CHECK-SAME:      %[[M_IN:.*]]: memref<1x?x4x6xi32>,
// CHECK-SAME:      %[[M_OUT:.*]]: memref<1x2x6xi32>) {
// CHECK:           %[[READ:.*]] = vector.transfer_read %[[M_IN]]{{.*}} : memref<1x?x4x6xi32>, vector<1x2x6xi32>
// CHECK:           %[[COLLAPSED:.*]] = memref.collapse_shape %[[M_OUT]]{{.*}} : memref<1x2x6xi32> into memref<12xi32>
// CHECK:           %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<1x2x6xi32> to vector<12xi32>
// CHECK:           vector.transfer_write %[[SC]], %[[COLLAPSED]]{{.*}} : vector<12xi32>, memref<12xi32>

// CHECK-128B-LABEL: func @transfer_read_dims_mismatch_non_zero_indices_dynamic_shapes(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @transfer_read_dims_mismatch_non_contiguous(
    %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> vector<2x1x2x2xi8> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0 : i8
    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
      memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, vector<2x1x2x2xi8>
    return %v : vector<2x1x2x2xi8>
}

// CHECK-LABEL: func.func @transfer_read_dims_mismatch_non_contiguous
// CHECK-NOT: memref.collapse_shape
// CHECK-NOT: vector.shape_cast

// CHECK-128B-LABEL: func @transfer_read_dims_mismatch_non_contiguous(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @transfer_read_dims_mismatch_non_contiguous_empty_stride(
    %arg : memref<5x4x3x2xi8>) -> vector<2x1x2x2xi8> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0 : i8
    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
      memref<5x4x3x2xi8>, vector<2x1x2x2xi8>
    return %v : vector<2x1x2x2xi8>
}

// CHECK-LABEL: func.func @transfer_read_dims_mismatch_non_contiguous_empty_stride(
// CHECK-NOT: memref.collapse_shape
// CHECK-NOT: vector.shape_cast

// CHECK-128B-LABEL: func @transfer_read_dims_mismatch_non_contiguous_empty_stride(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @transfer_write_dims_match_contiguous(
      %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, %vec : vector<5x4x3x2xi8>) {
    %c0 = arith.constant 0 : index
    vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] :
      vector<5x4x3x2xi8>, memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>
    return
}

// CHECK-LABEL: func @transfer_write_dims_match_contiguous(
// CHECK-SAME:      %[[ARG:[0-9a-zA-Z]+]]: memref<5x4x3x2xi8
// CHECK-SAME:      %[[VEC:[0-9a-zA-Z]+]]: vector<5x4x3x2xi8>
// CHECK-DAG:     %[[COLLAPSED:.+]] = memref.collapse_shape %[[ARG]] {{.}}[0, 1, 2, 3]{{.}} : memref<5x4x3x2xi8, {{.+}}> into memref<120xi8, {{.+}}>
// CHECK-DAG:     %[[VEC1D:.+]] = vector.shape_cast %[[VEC]] : vector<5x4x3x2xi8> to vector<120xi8>
// CHECK:         vector.transfer_write %[[VEC1D]], %[[COLLAPSED]]

// CHECK-128B-LABEL: func @transfer_write_dims_match_contiguous(
//       CHECK-128B:   memref.collapse_shape

// -----

func.func @transfer_write_dims_mismatch_contiguous(
      %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, %vec : vector<1x1x2x2xi8>) {
    %c0 = arith.constant 0 : index
    vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] :
      vector<1x1x2x2xi8>, memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>
    return
}

// CHECK-LABEL:   func.func @transfer_write_dims_mismatch_contiguous
// CHECK-SAME:                                            %[[VAL_0:.*]]: memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>,
// CHECK-SAME:                                            %[[VAL_1:.*]]: vector<1x1x2x2xi8>) {
// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
// CHECK:           %[[VAL_3:.*]] = memref.collapse_shape %[[VAL_0]] {{\[\[}}0, 1, 2, 3]] : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>> into memref<120xi8, strided<[1], offset: ?>>
// CHECK:           %[[VAL_4:.*]] = vector.shape_cast %[[VAL_1]] : vector<1x1x2x2xi8> to vector<4xi8>
// CHECK:           vector.transfer_write %[[VAL_4]], %[[VAL_3]]{{\[}}%[[VAL_2]]] {in_bounds = [true]} : vector<4xi8>, memref<120xi8, strided<[1], offset: ?>>
// CHECK:           return
// CHECK:         }

// CHECK-128B-LABEL: func @transfer_write_dims_mismatch_contiguous(
//       CHECK-128B:   memref.collapse_shape

// -----

func.func @transfer_write_dims_mismatch_non_contiguous(
      %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, %vec : vector<2x1x2x2xi8>) {
    %c0 = arith.constant 0 : index
    vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] :
      vector<2x1x2x2xi8>, memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>
    return
}

// CHECK-LABEL: func.func @transfer_write_dims_mismatch_non_contiguous
// CHECK-NOT: memref.collapse_shape
// CHECK-NOT: vector.shape_cast

// CHECK-128B-LABEL: func @transfer_write_dims_mismatch_non_contiguous(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @transfer_write_0d(%arg : memref<i8>, %vec : vector<i8>) {
      vector.transfer_write %vec, %arg[] : vector<i8>, memref<i8>
      return
}

// CHECK-LABEL: func.func @transfer_write_0d
// CHECK-NOT: memref.collapse_shape
// CHECK-NOT: vector.shape_cast

// CHECK-128B-LABEL: func @transfer_write_0d(
//   CHECK-128B-NOT:   memref.collapse_shape
//   CHECK-128B-NOT:   vector.shape_cast

// -----

func.func @transfer_read_0d(%arg : memref<i8>) -> vector<i8> {
      %cst = arith.constant 0 : i8
      %0 = vector.transfer_read %arg[], %cst : memref<i8>, vector<i8>
      return %0 : vector<i8>
}

// CHECK-LABEL: func.func @transfer_read_0d
// CHECK-NOT: memref.collapse_shape
// CHECK-NOT: vector.shape_cast

// CHECK-128B-LABEL: func @transfer_read_0d(
//   CHECK-128B-NOT:   memref.collapse_shape
//   CHECK-128B-NOT:   vector.shape_cast

// -----

func.func @transfer_read_flattenable_with_dynamic_dims_and_indices(%arg0 : memref<?x?x8x4xi8, strided<[?, 32, 4, 1], offset: ?>>, %arg1 : index, %arg2 : index) -> vector<8x4xi8> {
    %c0_i8 = arith.constant 0 : i8
    %c0 = arith.constant 0 : index
    %result = vector.transfer_read %arg0[%arg1, %arg2, %c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref<?x?x8x4xi8, strided<[?, 32, 4, 1], offset: ?>>, vector<8x4xi8>
    return %result : vector<8x4xi8>
}

// CHECK-LABEL: func @transfer_read_flattenable_with_dynamic_dims_and_indices
// CHECK-SAME:    %[[ARG0:.+]]: memref<?x?x8x4xi8, {{.+}}>, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index
// CHECK:       %[[C0_I8:.+]] = arith.constant 0 : i8
// CHECK:       %[[C0:.+]] = arith.constant 0 : index
// CHECK:       %[[COLLAPSED:.+]] = memref.collapse_shape %[[ARG0]] {{\[}}[0], [1], [2, 3]{{\]}}
// CHECK-SAME:    : memref<?x?x8x4xi8, {{.+}}> into memref<?x?x32xi8, {{.+}}>
// CHECK:       %[[VEC1D:.+]] = vector.transfer_read %[[COLLAPSED]]
// CHECK-SAME:    [%[[ARG1]], %[[ARG2]], %[[C0]]], %[[C0_I8]]
// CHECK-SAME:    {in_bounds = [true]}
// CHECK-SAME:    : memref<?x?x32xi8, {{.+}}>, vector<32xi8>
// CHECK:       %[[VEC2D:.+]] = vector.shape_cast %[[VEC1D]] : vector<32xi8> to vector<8x4xi8>
// CHECK:       return %[[VEC2D]] : vector<8x4xi8>

// CHECK-128B-LABEL: func @transfer_read_flattenable_with_dynamic_dims_and_indices(
//       CHECK-128B:   memref.collapse_shape

// -----

func.func @transfer_write_flattenable_with_dynamic_dims_and_indices(%vec : vector<8x4xi8>, %dst : memref<?x?x8x4xi8, strided<[?, 32, 4, 1], offset: ?>>, %arg1 : index, %arg2 : index) {
    %c0 = arith.constant 0 : index
    vector.transfer_write %vec, %dst[%arg1, %arg2, %c0, %c0] {in_bounds = [true, true]} : vector<8x4xi8>, memref<?x?x8x4xi8, strided<[?, 32, 4, 1], offset: ?>>
    return
}

// CHECK-LABEL: func @transfer_write_flattenable_with_dynamic_dims_and_indices
// CHECK-SAME:    %[[ARG0:.+]]: vector<8x4xi8>, %[[ARG1:.+]]: memref<?x?x8x4xi8, {{.+}}>, %[[ARG2:.+]]: index, %[[ARG3:.+]]: index
// CHECK:       %[[C0:.+]] = arith.constant 0 : index
// CHECK:       %[[COLLAPSED:.+]] = memref.collapse_shape %[[ARG1]] {{\[}}[0], [1], [2, 3]{{\]}}
// CHECK-SAME:    : memref<?x?x8x4xi8, {{.+}}> into memref<?x?x32xi8, {{.+}}>
// CHECK:       %[[VEC1D:.+]] = vector.shape_cast %[[ARG0]] : vector<8x4xi8> to vector<32xi8>
// CHECK:       vector.transfer_write %[[VEC1D]], %[[COLLAPSED]]
// CHECK-SAME:    [%[[ARG2]], %[[ARG3]], %[[C0]]]
// CHECK-SAME:    {in_bounds = [true]}
// CHECK-SAME:    : vector<32xi8>, memref<?x?x32xi8, {{.+}}>

// CHECK-128B-LABEL: func @transfer_write_flattenable_with_dynamic_dims_and_indices(
//       CHECK-128B:   memref.collapse_shape

// -----

func.func @transfer_read_flattenable_negative(
      %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> vector<2x2x2x2xi8> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0 : i8
    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
      memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, vector<2x2x2x2xi8>
    return %v : vector<2x2x2x2xi8>
}

// CHECK-LABEL: func @transfer_read_flattenable_negative
//       CHECK:   vector.transfer_read {{.*}} vector<2x2x2x2xi8>

// CHECK-128B-LABEL: func @transfer_read_flattenable_negative(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @transfer_read_flattenable_negative2(
      %arg : memref<5x4x3x2xi8, strided<[24, 8, 2, 1], offset: ?>>) -> vector<5x4x3x2xi8> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0 : i8
    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
      memref<5x4x3x2xi8, strided<[24, 8, 2, 1], offset: ?>>, vector<5x4x3x2xi8>
    return %v : vector<5x4x3x2xi8>
}

// CHECK-LABEL: func @transfer_read_flattenable_negative2
//       CHECK:   vector.transfer_read {{.*}} vector<5x4x3x2xi8>

// CHECK-128B-LABEL: func @transfer_read_flattenable_negative2(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @fold_unit_dim_add_basic(%arg0 : vector<1x8xi32>) -> vector<1x8xi32> {
   %add = arith.addi %arg0, %arg0 : vector<1x8xi32>
   return %add : vector<1x8xi32>
}
// CHECK-LABEL:   func.func @fold_unit_dim_add_basic(
// CHECK-SAME:      %[[VAL_0:.*]]: vector<1x8xi32>) -> vector<1x8xi32> {
// CHECK:           %[[VAL_1:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x8xi32> to vector<8xi32>
// CHECK:           %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x8xi32> to vector<8xi32>
// CHECK:           %[[VAL_3:.*]] = arith.addi %[[VAL_1]], %[[VAL_2]] : vector<8xi32>
// CHECK:           %[[VAL_4:.*]] = vector.shape_cast %[[VAL_3]] : vector<8xi32> to vector<1x8xi32>
// CHECK:           return %[[VAL_4]] : vector<1x8xi32>

// CHECK-128B-LABEL: func @fold_unit_dim_add_basic(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @fold_unit_dim_add_leading_and_trailing(%arg0 : vector<1x8x1xi32>) -> vector<1x8x1xi32> {
   %add = arith.addi %arg0, %arg0 : vector<1x8x1xi32>
   return %add : vector<1x8x1xi32>
}
// CHECK-LABEL:   func.func @fold_unit_dim_add_leading_and_trailing(
// CHECK-SAME:      %[[VAL_0:.*]]: vector<1x8x1xi32>) -> vector<1x8x1xi32> {
// CHECK:           %[[VAL_1:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x8x1xi32> to vector<8xi32>
// CHECK:           %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x8x1xi32> to vector<8xi32>
// CHECK:           %[[VAL_3:.*]] = arith.addi %[[VAL_1]], %[[VAL_2]] : vector<8xi32>
// CHECK:           %[[VAL_4:.*]] = vector.shape_cast %[[VAL_3]] : vector<8xi32> to vector<1x8x1xi32>
// CHECK:           return %[[VAL_4]] : vector<1x8x1xi32>

// CHECK-128B-LABEL: func @fold_unit_dim_add_leading_and_trailing(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @fold_unit_dim_add(%arg0 : vector<8x1xi32>,
                             %arg1 : vector<1x8xi32>) -> vector<8xi32> {
   %sc_arg0 = vector.shape_cast %arg0 : vector<8x1xi32> to vector<1x8xi32>
   %add = arith.addi %sc_arg0, %arg1 : vector<1x8xi32>
   %res = vector.shape_cast %add : vector<1x8xi32> to vector<8xi32>
   return %res : vector<8xi32>
}

// CHECK-LABEL:   func.func @fold_unit_dim_add(
// CHECK-SAME:      %[[VAL_0:.*]]: vector<8x1xi32>,
// CHECK-SAME:      %[[VAL_1:.*]]: vector<1x8xi32>) -> vector<8xi32> {
// CHECK:           %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x1xi32> to vector<8xi32>
// CHECK:           %[[VAL_3:.*]] = vector.shape_cast %[[VAL_1]] : vector<1x8xi32> to vector<8xi32>
// CHECK:           %[[VAL_4:.*]] = arith.addi %[[VAL_2]], %[[VAL_3]] : vector<8xi32>
// CHECK:           return %[[VAL_4]] : vector<8xi32>

// CHECK-128B-LABEL: func @fold_unit_dim_add(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @fold_unit_dim_mulf(%arg0 : vector<8x[2]x1xf32>,
                              %arg1 : vector<1x8x[2]xf32>) -> vector<8x[2]xf32> {
   %sc_arg0 = vector.shape_cast %arg0 : vector<8x[2]x1xf32> to vector<1x8x[2]xf32>
   %add = arith.mulf %sc_arg0, %arg1 : vector<1x8x[2]xf32>
   %res = vector.shape_cast %add : vector<1x8x[2]xf32> to vector<8x[2]xf32>
   return %res : vector<8x[2]xf32>
}

// CHECK-LABEL:   func.func @fold_unit_dim_mulf(
// CHECK-SAME:      %[[VAL_0:.*]]: vector<8x[2]x1xf32>,
// CHECK-SAME:      %[[VAL_1:.*]]: vector<1x8x[2]xf32>) -> vector<8x[2]xf32> {
// CHECK:           %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x[2]x1xf32> to vector<8x[2]xf32>
// CHECK:           %[[VAL_3:.*]] = vector.shape_cast %[[VAL_1]] : vector<1x8x[2]xf32> to vector<8x[2]xf32>
// CHECK:           %[[VAL_4:.*]] = arith.mulf %[[VAL_2]], %[[VAL_3]] : vector<8x[2]xf32>
// CHECK:           return %[[VAL_4]] : vector<8x[2]xf32>

// CHECK-128B-LABEL: func @fold_unit_dim_mulf(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

func.func @fold_unit_dim_sitofp(%arg0 : vector<8x[2]x1xi8>) -> vector<8x[2]xf32> {
   %sc_arg0 = vector.shape_cast %arg0 : vector<8x[2]x1xi8> to vector<1x8x[2]xi8>
   %add = arith.sitofp %sc_arg0 : vector<1x8x[2]xi8> to vector<1x8x[2]xf32>
   %res = vector.shape_cast %add : vector<1x8x[2]xf32> to vector<8x[2]xf32>
   return %res : vector<8x[2]xf32>
}

// CHECK-LABEL:   func.func @fold_unit_dim_sitofp(
// CHECK-SAME:      %[[VAL_0:.*]]: vector<8x[2]x1xi8>) -> vector<8x[2]xf32> {
// CHECK:           %[[VAL_1:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x[2]x1xi8> to vector<8x[2]xi8>
// CHECK:           %[[VAL_2:.*]] = arith.sitofp %[[VAL_1]] : vector<8x[2]xi8> to vector<8x[2]xf32>
// CHECK:           return %[[VAL_2]] : vector<8x[2]xf32>

// CHECK-128B-LABEL: func @fold_unit_dim_sitofp(
//   CHECK-128B-NOT:   memref.collapse_shape

// -----

// All shape casts are folded away

func.func @fold_unit_dims_entirely(%arg0 : vector<8xi32>,
                                   %arg1 : vector<8xi32>,
                                   %arg2 : vector<8xi32>) -> vector<8xi32> {
   %sc_arg0 = vector.shape_cast %arg0 : vector<8xi32> to vector<1x8xi32>
   %sc_arg1 = vector.shape_cast %arg1 : vector<8xi32> to vector<1x8xi32>
   %sc_arg2 = vector.shape_cast %arg2 : vector<8xi32> to vector<1x8xi32>
   %mul = arith.muli %sc_arg0, %sc_arg1 : vector<1x8xi32>
   %add = arith.addi %mul, %sc_arg2 : vector<1x8xi32>
   %res = vector.shape_cast %add : vector<1x8xi32> to vector<8xi32>
   return %res : vector<8xi32>
}

// CHECK-LABEL:   func.func @fold_unit_dims_entirely(
// CHECK-SAME:      %[[VAL_0:.*]]: vector<8xi32>, %[[VAL_1:.*]]: vector<8xi32>,
// CHECK-SAME:      %[[VAL_2:.*]]: vector<8xi32>) -> vector<8xi32> {
// CHECK:           %[[VAL_3:.*]] = arith.muli %[[VAL_0]], %[[VAL_1]] : vector<8xi32>
// CHECK:           %[[VAL_4:.*]] = arith.addi %[[VAL_3]], %[[VAL_2]] : vector<8xi32>
// CHECK:           return %[[VAL_4]] : vector<8xi32>

// CHECK-128B-LABEL: func @fold_unit_dims_entirely(
//   CHECK-128B-NOT:   memref.collapse_shape