aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir
blob: 6eb5b7bad166d78cca71fe69ad1e68e8570009c1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=amdgpu-pre-ra-optimizations -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX908 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -passes="amdgpu-pre-ra-optimizations" %s -o - | FileCheck -check-prefix=GFX908 %s

---
name: test_mfma_f32_4x4x1f32_propagate_vgpr
tracksRegLiveness: true

body: |
  bb.0:
    liveins: $sgpr0_sgpr1
    ; GFX908-LABEL: name: test_mfma_f32_4x4x1f32_propagate_vgpr
    ; GFX908: liveins: $sgpr0_sgpr1
    ; GFX908-NEXT: {{  $}}
    ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
    ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
    ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1123418112, implicit $exec
    ; GFX908-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B32_e32_1]], implicit $exec
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub1:areg_128 = COPY [[V_MOV_B32_e32_1]]
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub2:areg_128 = COPY [[V_MOV_B32_e32_1]]
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub3:areg_128 = COPY [[V_MOV_B32_e32_1]]
    ; GFX908-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
    ; GFX908-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
    ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_3]], [[V_MOV_B32_e32_2]], [[V_ACCVGPR_WRITE_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
    ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_F32_4X4X1F32_e64_]]
    ; GFX908-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
    ; GFX908-NEXT: S_ENDPGM 0
    %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
    %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
    %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %13:vgpr_32 = V_MOV_B32_e32 1123418112, implicit $exec
    undef %11.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 %13:vgpr_32, implicit $exec
    %11.sub1:areg_128 = COPY %11.sub0:areg_128
    %11.sub2:areg_128 = COPY %11.sub0:areg_128
    %11.sub3:areg_128 = COPY %11.sub0:areg_128
    %8:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
    %9:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
    %10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %9:vgpr_32, %8:vgpr_32, %11:areg_128, 0, 0, 0, implicit $mode, implicit $exec
    %12:vreg_128 = COPY %10:areg_128
    GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec_xnull, 0, 0, implicit $exec :: (store (s128), addrspace 1)
    S_ENDPGM 0
...
---
name: test_mfma_f32_4x4x1f32_no_propagate_imm
tracksRegLiveness: true

body: |
  bb.0:
    liveins: $sgpr0_sgpr1
    ; GFX908-LABEL: name: test_mfma_f32_4x4x1f32_no_propagate_imm
    ; GFX908: liveins: $sgpr0_sgpr1
    ; GFX908-NEXT: {{  $}}
    ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
    ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
    ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    ; GFX908-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 1073741824, implicit $exec
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub1:areg_128 = COPY [[V_ACCVGPR_WRITE_B32_e64_]].sub0
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub2:areg_128 = COPY [[V_ACCVGPR_WRITE_B32_e64_]].sub0
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub3:areg_128 = COPY [[V_ACCVGPR_WRITE_B32_e64_]].sub0
    ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
    ; GFX908-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
    ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_2]], [[V_MOV_B32_e32_1]], [[V_ACCVGPR_WRITE_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
    ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_F32_4X4X1F32_e64_]]
    ; GFX908-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
    ; GFX908-NEXT: S_ENDPGM 0
    %1:sgpr_64(p4) = COPY $sgpr0_sgpr1
    %4:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
    %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    undef %11.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 1073741824, implicit $exec
    %11.sub1:areg_128 = COPY %11.sub0:areg_128
    %11.sub2:areg_128 = COPY %11.sub0:areg_128
    %11.sub3:areg_128 = COPY %11.sub0:areg_128
    %8:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
    %9:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
    %10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %9:vgpr_32, %8:vgpr_32, %11:areg_128, 0, 0, 0, implicit $mode, implicit $exec
    %12:vreg_128 = COPY %10:areg_128
    GLOBAL_STORE_DWORDX4_SADDR %5:vgpr_32, %12:vreg_128, %4:sreg_64_xexec_xnull, 0, 0, implicit $exec :: (store (s128), addrspace 1)
    S_ENDPGM 0
...
---
name: test_vgpr_subreg_propagate
tracksRegLiveness: true

body: |
  bb.0:
    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
    ; GFX908-LABEL: name: test_vgpr_subreg_propagate
    ; GFX908: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
    ; GFX908-NEXT: {{  $}}
    ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
    ; GFX908-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub0, implicit $exec
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub1:areg_128 = COPY [[COPY]].sub0
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub2:areg_128 = COPY [[COPY]].sub0
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub3:areg_128 = COPY [[COPY]].sub0
    ; GFX908-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[V_ACCVGPR_WRITE_B32_e64_]]
    %0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
    undef %1.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 %0.sub0, implicit $exec
    %1.sub1:areg_128 = COPY %1.sub0:areg_128
    %1.sub2:areg_128 = COPY %1.sub0:areg_128
    %1.sub3:areg_128 = COPY %1.sub0:areg_128
    S_ENDPGM 0, implicit %0, implicit %1
...
---
name: test_nonmatching_agpr_subreg_no_propagate
tracksRegLiveness: true

body: |
  bb.0:
    liveins: $vgpr0_vgpr1
    ; GFX908-LABEL: name: test_nonmatching_agpr_subreg_no_propagate
    ; GFX908: liveins: $vgpr0_vgpr1
    ; GFX908-NEXT: {{  $}}
    ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec
    ; GFX908-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub0, implicit $exec
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub1, implicit $exec
    ; GFX908-NEXT: [[COPY1:%[0-9]+]]:areg_64 = COPY [[V_ACCVGPR_WRITE_B32_e64_]]
    ; GFX908-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[V_ACCVGPR_WRITE_B32_e64_]], implicit [[COPY1]]
    %0:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec
    undef %1.sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 %0.sub0, implicit $exec
    %1.sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 %0.sub1, implicit $exec
    %2:areg_64 = COPY %1:areg_64
    S_ENDPGM 0, implicit %0, implicit %1, implicit %2
...
---
name: test_subreg_to_single_agpr_reg_propagate
tracksRegLiveness: true

body: |
  bb.0:
    liveins: $vgpr0_vgpr1
    ; GFX908-LABEL: name: test_subreg_to_single_agpr_reg_propagate
    ; GFX908: liveins: $vgpr0_vgpr1
    ; GFX908-NEXT: {{  $}}
    ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec
    ; GFX908-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub0, implicit $exec
    ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub1, implicit $exec
    ; GFX908-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]].sub1
    ; GFX908-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[V_ACCVGPR_WRITE_B32_e64_]], implicit [[COPY1]]
    %0:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec
    undef %1.sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 %0.sub0, implicit $exec
    %1.sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 %0.sub1, implicit $exec
    %2:agpr_32 = COPY %1.sub1:areg_64
    S_ENDPGM 0, implicit %0, implicit %1, implicit %2
...