aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/LowerMatrixIntrinsics/data-layout.ll
blob: 3d050140666c20d9e7091e971d7e322c925bcc93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='lower-matrix-intrinsics' -data-layout='p:64:64' -S < %s | FileCheck %s --check-prefix=PTR64
; RUN: opt -passes='lower-matrix-intrinsics' -data-layout='p:32:32' -S < %s | FileCheck %s --check-prefix=PTR32

; To properly support the matrix intrinsics on, e.g., 32-bit platforms (without
; the need to emit `libc` calls), we perform strided index calculations using
; the same pointer bit-width as the matrix pointers, as determined by the data
; layout. To verify this behaviour, this test runs several strided loads and
; stores through the lowering pass with (32|64)-bit pointers, and verifies the
; generated code extends / truncates strides accordingly. Similarly,
; `data-layout-multiply-fused.ll` adopts this approach to verify the same
; behaviour for index calculations emitted while lowering fused matrix
; multiplies.

define <9 x double> @strided_load_3x3_i64(ptr %in, i64 %stride) {
; PTR64-LABEL: @strided_load_3x3_i64(
; PTR64-NEXT:  entry:
; PTR64-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
; PTR64-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN:%.*]], i64 [[VEC_START]]
; PTR64-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8
; PTR64-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
; PTR64-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN]], i64 [[VEC_START1]]
; PTR64-NEXT:    [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8
; PTR64-NEXT:    [[VEC_START4:%.*]] = mul i64 2, [[STRIDE]]
; PTR64-NEXT:    [[VEC_GEP5:%.*]] = getelementptr double, ptr [[IN]], i64 [[VEC_START4]]
; PTR64-NEXT:    [[COL_LOAD6:%.*]] = load <3 x double>, ptr [[VEC_GEP5]], align 8
; PTR64-NEXT:    [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD3]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
; PTR64-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD6]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
; PTR64-NEXT:    [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; PTR64-NEXT:    ret <9 x double> [[TMP2]]
;
; PTR32-LABEL: @strided_load_3x3_i64(
; PTR32-NEXT:  entry:
; PTR32-NEXT:    [[STRIDE_CAST:%.*]] = trunc i64 [[STRIDE:%.*]] to i32
; PTR32-NEXT:    [[VEC_START:%.*]] = mul i32 0, [[STRIDE_CAST]]
; PTR32-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN:%.*]], i32 [[VEC_START]]
; PTR32-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8
; PTR32-NEXT:    [[VEC_START1:%.*]] = mul i32 1, [[STRIDE_CAST]]
; PTR32-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN]], i32 [[VEC_START1]]
; PTR32-NEXT:    [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8
; PTR32-NEXT:    [[VEC_START4:%.*]] = mul i32 2, [[STRIDE_CAST]]
; PTR32-NEXT:    [[VEC_GEP5:%.*]] = getelementptr double, ptr [[IN]], i32 [[VEC_START4]]
; PTR32-NEXT:    [[COL_LOAD6:%.*]] = load <3 x double>, ptr [[VEC_GEP5]], align 8
; PTR32-NEXT:    [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD3]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
; PTR32-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD6]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
; PTR32-NEXT:    [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; PTR32-NEXT:    ret <9 x double> [[TMP2]]
;
entry:
  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64.i64(ptr %in, i64 %stride, i1 false, i32 3, i32 3)
  ret <9 x double> %load
}

define <9 x double> @strided_load_3x3_const_stride_i64(ptr %in) {
; PTR64-LABEL: @strided_load_3x3_const_stride_i64(
; PTR64-NEXT:  entry:
; PTR64-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN:%.*]], align 8
; PTR64-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 16
; PTR64-NEXT:    [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8
; PTR64-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN]], i64 32
; PTR64-NEXT:    [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8
; PTR64-NEXT:    [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD1]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
; PTR64-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD3]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
; PTR64-NEXT:    [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; PTR64-NEXT:    ret <9 x double> [[TMP2]]
;
; PTR32-LABEL: @strided_load_3x3_const_stride_i64(
; PTR32-NEXT:  entry:
; PTR32-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN:%.*]], align 8
; PTR32-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i32 16
; PTR32-NEXT:    [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8
; PTR32-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN]], i32 32
; PTR32-NEXT:    [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8
; PTR32-NEXT:    [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD1]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
; PTR32-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD3]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
; PTR32-NEXT:    [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; PTR32-NEXT:    ret <9 x double> [[TMP2]]
;
entry:
  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64.i64(ptr %in, i64 16, i1 false, i32 3, i32 3)
  ret <9 x double> %load
}

define <9 x double> @strided_load_3x3_i32(ptr %in, i32 %stride) {
; PTR64-LABEL: @strided_load_3x3_i32(
; PTR64-NEXT:  entry:
; PTR64-NEXT:    [[STRIDE_CAST:%.*]] = zext i32 [[STRIDE:%.*]] to i64
; PTR64-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE_CAST]]
; PTR64-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN:%.*]], i64 [[VEC_START]]
; PTR64-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8
; PTR64-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE_CAST]]
; PTR64-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN]], i64 [[VEC_START1]]
; PTR64-NEXT:    [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8
; PTR64-NEXT:    [[VEC_START4:%.*]] = mul i64 2, [[STRIDE_CAST]]
; PTR64-NEXT:    [[VEC_GEP5:%.*]] = getelementptr double, ptr [[IN]], i64 [[VEC_START4]]
; PTR64-NEXT:    [[COL_LOAD6:%.*]] = load <3 x double>, ptr [[VEC_GEP5]], align 8
; PTR64-NEXT:    [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD3]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
; PTR64-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD6]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
; PTR64-NEXT:    [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; PTR64-NEXT:    ret <9 x double> [[TMP2]]
;
; PTR32-LABEL: @strided_load_3x3_i32(
; PTR32-NEXT:  entry:
; PTR32-NEXT:    [[VEC_START:%.*]] = mul i32 0, [[STRIDE:%.*]]
; PTR32-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN:%.*]], i32 [[VEC_START]]
; PTR32-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8
; PTR32-NEXT:    [[VEC_START1:%.*]] = mul i32 1, [[STRIDE]]
; PTR32-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN]], i32 [[VEC_START1]]
; PTR32-NEXT:    [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8
; PTR32-NEXT:    [[VEC_START4:%.*]] = mul i32 2, [[STRIDE]]
; PTR32-NEXT:    [[VEC_GEP5:%.*]] = getelementptr double, ptr [[IN]], i32 [[VEC_START4]]
; PTR32-NEXT:    [[COL_LOAD6:%.*]] = load <3 x double>, ptr [[VEC_GEP5]], align 8
; PTR32-NEXT:    [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD3]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
; PTR32-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD6]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
; PTR32-NEXT:    [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; PTR32-NEXT:    ret <9 x double> [[TMP2]]
;
entry:
  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr %in, i32 %stride, i1 false, i32 3, i32 3)
  ret <9 x double> %load
}

define <9 x double> @strided_load_3x3_const_stride_i32(ptr %in) {
; PTR64-LABEL: @strided_load_3x3_const_stride_i32(
; PTR64-NEXT:  entry:
; PTR64-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN:%.*]], align 8
; PTR64-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 16
; PTR64-NEXT:    [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8
; PTR64-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN]], i64 32
; PTR64-NEXT:    [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8
; PTR64-NEXT:    [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD1]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
; PTR64-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD3]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
; PTR64-NEXT:    [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; PTR64-NEXT:    ret <9 x double> [[TMP2]]
;
; PTR32-LABEL: @strided_load_3x3_const_stride_i32(
; PTR32-NEXT:  entry:
; PTR32-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN:%.*]], align 8
; PTR32-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i32 16
; PTR32-NEXT:    [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8
; PTR32-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN]], i32 32
; PTR32-NEXT:    [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8
; PTR32-NEXT:    [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD1]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
; PTR32-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD3]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 poison, i32 poison>
; PTR32-NEXT:    [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; PTR32-NEXT:    ret <9 x double> [[TMP2]]
;
entry:
  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr %in, i32 16, i1 false, i32 3, i32 3)
  ret <9 x double> %load
}

declare <9 x double> @llvm.matrix.column.major.load.v9f64.i64(ptr, i64, i1, i32, i32)
declare <9 x double> @llvm.matrix.column.major.load.v9f64.i32(ptr, i32, i1, i32, i32)