diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
86 files changed, 4684 insertions, 5745 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir index ae683ec..366b1ed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir @@ -137,10 +137,10 @@ body: | bb.0: ; Test that trunc(trunc) is combined to a single trunc ; CHECK-LABEL: name: trunc_trunc - ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) - %0:_(s64) = G_IMPLICIT_DEF + %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s48) = G_TRUNC %0 %2:_(s32) = G_TRUNC %1 $vgpr0 = COPY %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir index 8b19d7d..88cb1ab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir @@ -345,36 +345,20 @@ body: | ; SI-LABEL: name: test_abs_v3s16 ; SI: liveins: $vgpr0 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]] - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG2]] + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[C]] + ; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[C]] + ; SI-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[C]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ABS]](s32), [[ABS1]](s32), [[ABS2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; ; VI-LABEL: name: test_abs_v3s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] - ; VI-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[TRUNC1]] - ; VI-NEXT: [[ABS2:%[0-9]+]]:_(s16) = G_ABS [[TRUNC2]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[DEF]] + ; VI-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[DEF]] + ; VI-NEXT: [[ABS2:%[0-9]+]]:_(s16) = G_ABS [[DEF]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS1]](s16) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS2]](s16) @@ -384,18 +368,16 @@ body: | ; GFX9-LABEL: name: test_abs_v3s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x s16>) = G_ABS [[BUILD_VECTOR]] + ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[DEF]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ABS]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x s16>) = G_ABS [[UV]] - ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ABS]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST1]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_ABS %0 %2:_(<3 x s32>) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index c734711..83ca323 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -373,22 +373,17 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v5s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV9]] - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](<2 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[AND2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[AND2]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF @@ -456,24 +451,12 @@ body: | ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 @@ -511,63 +494,62 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v5s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND3]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF @@ -583,13 +565,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF @@ -605,14 +584,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v4s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV7]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF @@ -628,18 +604,15 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v8s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<8 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<8 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV8]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV9]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV10]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV11]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV12]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV13]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV14]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV15]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) %0:_(<8 x s8>) = G_IMPLICIT_DEF @@ -655,28 +628,23 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v16s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV16]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV17]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV18]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV19]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV20]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV21]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV22]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV23]] - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[UV56]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[UV57]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[UV58]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[UV59]] - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV44]], [[UV60]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV45]], [[UV61]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV46]], [[UV62]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV47]], [[UV63]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32), [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32), [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) %0:_(<16 x s8>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index a585fa7..6ff6316 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -649,9 +649,8 @@ body: | ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[DEF]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_ashr_v3s64_v3s32 @@ -664,9 +663,8 @@ body: | ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32 @@ -679,9 +677,8 @@ body: | ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[DEF]](s64) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 @@ -857,26 +854,20 @@ body: | ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_ashr_v3s16_v3s16 @@ -902,27 +893,20 @@ body: | ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC3]](s16) ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC4]](s16) ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16 @@ -945,17 +929,10 @@ body: | ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR1]](s16), [[TRUNC4]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR1]](s16), [[DEF]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index 724d581..f7f7732 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -233,9 +233,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s8>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 @@ -270,9 +269,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 @@ -288,9 +286,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i1 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_CONSTANT i1 false @@ -983,9 +980,8 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v8i64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<8 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) %0:_(<8 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 @@ -1003,9 +999,8 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v16i64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) %0:_(<16 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index 6630300..d981769 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -516,9 +516,8 @@ name: extract_s8_v4s8_offset0 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset0 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 0 %2:_(s32) = G_ANYEXT %1 @@ -530,9 +529,8 @@ name: extract_s8_v4s8_offset8 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 8 %2:_(s32) = G_ANYEXT %1 @@ -544,9 +542,8 @@ name: extract_s8_v4s8_offset16 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset16 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -558,9 +555,8 @@ name: extract_s8_v4s8_offset24 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset24 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 24 %2:_(s32) = G_ANYEXT %1 @@ -573,9 +569,8 @@ name: extract_s8_v3s8_offset16 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v3s8_offset16 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -587,9 +582,8 @@ name: extract_s8_v5s1_offset4 body: | bb.0: ; CHECK-LABEL: name: extract_s8_v5s1_offset4 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV4]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_EXTRACT %0, 4 %2:_(s32) = G_ANYEXT %1 @@ -601,9 +595,8 @@ name: extract_v2s16_v4s16_offset32 body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v4s16_offset32 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) %0:_(<4 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -614,9 +607,8 @@ name: extract_v2s16_v6s16_offset32 body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -868,10 +860,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s16_v3s16_offset0 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(s16) = G_EXTRACT %0, 0 %2:_(s32) = G_ANYEXT %1 @@ -957,9 +947,12 @@ body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v3s16_offset0 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -971,9 +964,12 @@ body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v5s16_offset0 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir index d21526a..571bd10 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -266,85 +266,78 @@ body: | bb.0: ; SI-LABEL: name: test_fabs_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) + ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST]] ; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL1]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_fabs_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) + ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST]] ; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL1]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]] ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_fabs_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] - ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR]] - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FABS %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir index b209ef3..b48f566 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -425,27 +425,19 @@ body: | ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_fadd_v3s16 @@ -471,27 +463,19 @@ body: | ; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC3]] ; VI-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC4]] ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_fadd_v3s16 @@ -500,35 +484,21 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[UV]], [[UV3]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[UV1]], [[UV4]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir index d8b0439..b95b696 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -18,12 +18,14 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) + ; ; VI-LABEL: name: test_fcanonicalize_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) + ; ; GFX9-LABEL: name: test_fcanonicalize_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -45,11 +47,13 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; SI-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; ; VI-LABEL: name: test_fcanonicalize_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) + ; ; GFX9-LABEL: name: test_fcanonicalize_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -75,6 +79,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fcanonicalize_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -83,6 +88,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fcanonicalize_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -113,6 +119,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fcanonicalize_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -122,6 +129,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -152,6 +160,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32), [[FCANONICALIZE2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fcanonicalize_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -162,6 +171,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32), [[FCANONICALIZE2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -192,6 +202,7 @@ body: | ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCANONICALIZE]](s64), [[FCANONICALIZE1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fcanonicalize_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -201,6 +212,7 @@ body: | ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCANONICALIZE]](s64), [[FCANONICALIZE1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -242,6 +254,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_fcanonicalize_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -259,6 +272,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -276,22 +290,14 @@ body: | bb.0: ; SI-LABEL: name: test_fcanonicalize_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]] ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]] ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE1]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]] ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE2]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) @@ -299,39 +305,30 @@ body: | ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fcanonicalize_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[DEF]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[DEF]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[DEF]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE1]](s16) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE2]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST1]](s32), [[LSHR]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FCANONICALIZE %0 %2:_(<3 x s32>) = G_ANYEXT %1 @@ -382,6 +379,7 @@ body: | ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fcanonicalize_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -412,6 +410,7 @@ body: | ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fcanonicalize_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir index c25db579..cef4cd7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir @@ -19,6 +19,7 @@ body: | ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[COPY]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX8-LABEL: name: test_fcmp_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -27,6 +28,7 @@ body: | ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[COPY]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) + ; ; GFX9-LABEL: name: test_fcmp_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -55,6 +57,7 @@ body: | ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s64), [[COPY]] ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX8-LABEL: name: test_fcmp_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -63,6 +66,7 @@ body: | ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s64), [[COPY]] ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; ; GFX9-LABEL: name: test_fcmp_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -95,6 +99,7 @@ body: | ; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C]], [[TRUNC]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX8-LABEL: name: test_fcmp_s16 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -105,6 +110,7 @@ body: | ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C]], [[TRUNC]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fcmp_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -141,6 +147,7 @@ body: | ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_fcmp_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -153,6 +160,7 @@ body: | ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fcmp_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -190,6 +198,7 @@ body: | ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_fcmp_v2s32_flags ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -202,6 +211,7 @@ body: | ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fcmp_v2s32_flags ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -231,43 +241,42 @@ body: | ; GFX7-LABEL: name: test_fcmp_v3s32 ; GFX7: liveins: $vgpr0_vgpr1_vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]] - ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]] - ; GFX7-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]] + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV]] + ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV1]] + ; GFX7-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV2]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX8-LABEL: name: test_fcmp_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]] - ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]] - ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]] + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV1]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV2]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fcmp_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]] - ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]] - ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV1]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV2]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1) @@ -305,6 +314,7 @@ body: | ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1) ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) ; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX8-LABEL: name: test_fcmp_v4s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -323,6 +333,7 @@ body: | ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1) ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) ; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; ; GFX9-LABEL: name: test_fcmp_v4s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -383,6 +394,7 @@ body: | ; GFX7-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]] ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX8-LABEL: name: test_icmp_v2s16 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -407,6 +419,7 @@ body: | ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]] ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_icmp_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir index a0c2994e..51a16b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -20,6 +20,7 @@ body: | ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) ; SI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; ; VI-LABEL: name: test_fcos_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -29,6 +30,7 @@ body: | ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) ; VI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; ; GFX9-LABEL: name: test_fcos_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -57,6 +59,7 @@ body: | ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64) + ; ; VI-LABEL: name: test_fcos_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -66,6 +69,7 @@ body: | ; VI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) ; VI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64) + ; ; GFX9-LABEL: name: test_fcos_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -97,6 +101,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fcos_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -108,6 +113,7 @@ body: | ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fcos_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -145,6 +151,7 @@ body: | ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fcos_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -159,6 +166,7 @@ body: | ; VI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fcos_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -199,6 +207,7 @@ body: | ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fcos_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -216,6 +225,7 @@ body: | ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fcos_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -255,6 +265,7 @@ body: | ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s64) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fcos_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -269,6 +280,7 @@ body: | ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s64) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fcos_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -318,6 +330,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_fcos_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -340,6 +353,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fcos_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -367,28 +381,20 @@ body: | bb.0: ; SI-LABEL: name: test_fcos_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]] + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]] + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32) ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C1]] + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C]] ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) @@ -397,24 +403,17 @@ body: | ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fcos_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s16) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s16) - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; VI-NEXT: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) @@ -422,22 +421,15 @@ body: | ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fcos_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s16) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16) - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) @@ -503,6 +495,7 @@ body: | ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fcos_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -542,6 +535,7 @@ body: | ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fcos_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -589,6 +583,7 @@ body: | ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) ; SI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; ; VI-LABEL: name: test_fcos_s32_flags ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -598,6 +593,7 @@ body: | ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32) ; VI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; ; GFX9-LABEL: name: test_fcos_s32_flags ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir index 3fa73c2..30b716a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -1975,31 +1975,15 @@ body: | ; SI-LABEL: name: test_fdiv_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] @@ -2008,13 +1992,13 @@ body: | ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] @@ -2023,13 +2007,13 @@ body: | ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] @@ -2047,25 +2031,9 @@ body: | ; VI-LABEL: name: test_fdiv_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] @@ -2076,13 +2044,13 @@ body: | ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] ; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] ; VI-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]] + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C]] ; VI-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[DEF]](s16), [[DEF]](s16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) ; VI-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] @@ -2093,12 +2061,12 @@ body: | ; VI-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]] ; VI-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]] ; VI-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C]] ; VI-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]] ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[DEF]](s16), [[DEF]](s16) + ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]] ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) ; VI-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] @@ -2109,10 +2077,10 @@ body: | ; VI-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]] ; VI-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]] ; VI-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C]] ; VI-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]] ; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32) - ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[DEF]](s16), [[DEF]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) @@ -2122,25 +2090,9 @@ body: | ; GFX9-LABEL: name: test_fdiv_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] @@ -2151,13 +2103,13 @@ body: | ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] ; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C]] ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) ; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] @@ -2168,12 +2120,12 @@ body: | ; GFX9-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]] ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]] ; GFX9-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C]] ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]] ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]] ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) ; GFX9-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] @@ -2184,10 +2136,10 @@ body: | ; GFX9-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]] ; GFX9-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]] ; GFX9-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]] - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C]] ; GFX9-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]] ; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32) - ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) @@ -2197,25 +2149,9 @@ body: | ; GFX10-LABEL: name: test_fdiv_v3s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] @@ -2226,13 +2162,13 @@ body: | ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]] ; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]] ; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]] - ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 - ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]] + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C]] ; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]] ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[DEF]](s16), [[DEF]](s16) + ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) ; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] @@ -2243,12 +2179,12 @@ body: | ; GFX10-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]] ; GFX10-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]] ; GFX10-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]] - ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C]] ; GFX10-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]] ; GFX10-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX10-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[DEF]](s16), [[DEF]](s16) + ; GFX10-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; GFX10-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; GFX10-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]] ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) ; GFX10-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] @@ -2259,10 +2195,10 @@ body: | ; GFX10-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]] ; GFX10-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]] ; GFX10-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]] - ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C]] ; GFX10-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]] ; GFX10-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32) - ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[DEF]](s16), [[DEF]](s16) ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) ; GFX10-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) @@ -2272,12 +2208,9 @@ body: | ; GFX11-LABEL: name: test_fdiv_v3s16 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX11-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; GFX11-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; GFX11-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; GFX11-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] ; GFX11-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) ; GFX11-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] @@ -2289,9 +2222,9 @@ body: | ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL1]], [[C]] ; GFX11-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FMA1]] ; GFX11-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX11-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[UV4]](s16), [[UV]](s16) - ; GFX11-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; GFX11-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) + ; GFX11-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[DEF]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; GFX11-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; GFX11-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] ; GFX11-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) ; GFX11-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] @@ -2302,9 +2235,9 @@ body: | ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL3]], [[C]] ; GFX11-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FMA4]] ; GFX11-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX11-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[UV5]](s16), [[UV1]](s16) - ; GFX11-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; GFX11-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) + ; GFX11-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[DEF]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; GFX11-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; GFX11-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]] ; GFX11-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) ; GFX11-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] @@ -2315,7 +2248,7 @@ body: | ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL5]], [[C]] ; GFX11-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FMA7]] ; GFX11-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX11-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[UV6]](s16), [[UV2]](s16) + ; GFX11-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[DEF]](s16), [[DEF]](s16) ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) ; GFX11-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir index 3289d60..9999c181 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -17,12 +17,14 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) + ; ; VI-LABEL: name: test_ffloor_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) + ; ; GFX9-LABEL: name: test_ffloor_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -52,12 +54,14 @@ body: | ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_ffloor_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; ; GFX9-LABEL: name: test_ffloor_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -86,12 +90,14 @@ body: | ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]] ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[COPY]], [[FNEG]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_ffloor_s64_nnan ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; ; GFX9-LABEL: name: test_ffloor_s64_nnan ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -122,12 +128,14 @@ body: | ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nsz G_FNEG [[SELECT]] ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD [[COPY]], [[FNEG]] ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; ; VI-LABEL: name: test_ffloor_s64_nssaz ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; ; GFX9-LABEL: name: test_ffloor_s64_nssaz ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -156,6 +164,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_ffloor_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -164,6 +173,7 @@ body: | ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_ffloor_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -194,6 +204,7 @@ body: | ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_ffloor_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -203,6 +214,7 @@ body: | ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_ffloor_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -233,6 +245,7 @@ body: | ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_ffloor_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -243,6 +256,7 @@ body: | ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_ffloor_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -284,6 +298,7 @@ body: | ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_ffloor_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -293,6 +308,7 @@ body: | ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_ffloor_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -334,6 +350,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_ffloor_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -351,6 +368,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_ffloor_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -375,22 +393,14 @@ body: | bb.0: ; SI-LABEL: name: test_ffloor_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) @@ -398,37 +408,23 @@ body: | ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_ffloor_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_ffloor_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) @@ -484,6 +480,7 @@ body: | ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_ffloor_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -514,6 +511,7 @@ body: | ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_ffloor_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir index fd49c993..df2ce44 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -501,27 +501,19 @@ body: | ; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_fma_v3s16 @@ -555,27 +547,19 @@ body: | ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_fma_v3s16 @@ -585,39 +569,22 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index d977049..3fc2b1c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -503,27 +503,19 @@ body: | ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_fmaxnum_v3s16 @@ -555,27 +547,19 @@ body: | ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_fmaxnum_v3s16 @@ -584,39 +568,25 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index 32c353d..e2b3f35 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -503,27 +503,19 @@ body: | ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_fminnum_v3s16 @@ -555,27 +547,19 @@ body: | ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_fminnum_v3s16 @@ -584,39 +568,25 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir index bcc38df..33eadb8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -425,27 +425,19 @@ body: | ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_fmul_v3s16 @@ -471,27 +463,19 @@ body: | ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC3]] ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC4]] ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL1]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9PLUS-LABEL: name: test_fmul_v3s16 @@ -500,35 +484,21 @@ body: | ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[UV]], [[UV3]] - ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) + ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[UV1]], [[UV4]] + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir index eede021..82bd14d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -264,65 +264,58 @@ body: | bb.0: ; SI-LABEL: name: test_fneg_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST]] ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; ; VI-LABEL: name: test_fneg_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST]] ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; ; GFX9-LABEL: name: test_fneg_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16) - ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] - ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR]] - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FNEG %0 %2:_(<3 x s32>) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir index bd9eef0..cc3cf5d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir @@ -105,21 +105,11 @@ body: | ; CHECK-LABEL: name: test_fpext_v4f16_to_v4f32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT2]](s32), [[FPEXT3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir index b08f850..aab47bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -395,13 +395,13 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v33s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[BUILD_VECTOR]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[DEF]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<32 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE1]](s32) - ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<33 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE1]](s32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<33 x s32>) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(<33 x s32>) = G_FREEZE %0 S_NOP 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir index 2400362..4dfc9ed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -723,28 +723,22 @@ body: | ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC7]], [[TRUNC8]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL5]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; ; VI-LABEL: name: test_fshl_v3s16_v3s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -798,29 +792,22 @@ body: | ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL2]], [[LSHR8]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL5]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; ; GFX9-LABEL: name: test_fshl_v3s16_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -856,17 +843,9 @@ body: | ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL1]], [[LSHR3]] ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x s16>) ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir index 5f610924..243ad1f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -679,28 +679,22 @@ body: | ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT5]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC7]], [[TRUNC8]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL6]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL8]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -754,29 +748,22 @@ body: | ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[SHL4]], [[AND5]](s16) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[AND4]](s16) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL8]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -812,17 +799,9 @@ body: | ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL3]], [[LSHR1]] ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x s16>) ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir index 846ad4a..9e5f6b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -20,6 +20,7 @@ body: | ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) ; SI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; ; VI-LABEL: name: test_fsin_s32 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -29,6 +30,7 @@ body: | ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) ; VI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; ; GFX9-LABEL: name: test_fsin_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -57,6 +59,7 @@ body: | ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s64) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64) + ; ; VI-LABEL: name: test_fsin_s64 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -66,6 +69,7 @@ body: | ; VI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64) ; VI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s64) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64) + ; ; GFX9-LABEL: name: test_fsin_s64 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -97,6 +101,7 @@ body: | ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; VI-LABEL: name: test_fsin_s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -108,6 +113,7 @@ body: | ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; ; GFX9-LABEL: name: test_fsin_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -145,6 +151,7 @@ body: | ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; VI-LABEL: name: test_fsin_v2s32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -159,6 +166,7 @@ body: | ; VI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; ; GFX9-LABEL: name: test_fsin_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -199,6 +207,7 @@ body: | ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fsin_v3s32 ; VI: liveins: $vgpr0_vgpr1_vgpr2 ; VI-NEXT: {{ $}} @@ -216,6 +225,7 @@ body: | ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fsin_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -255,6 +265,7 @@ body: | ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s64) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; VI-LABEL: name: test_fsin_v2s64 ; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; VI-NEXT: {{ $}} @@ -269,6 +280,7 @@ body: | ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s64) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; ; GFX9-LABEL: name: test_fsin_v2s64 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -318,6 +330,7 @@ body: | ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; VI-LABEL: name: test_fsin_v2s16 ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -340,6 +353,7 @@ body: | ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; ; GFX9-LABEL: name: test_fsin_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -367,28 +381,20 @@ body: | bb.0: ; SI-LABEL: name: test_fsin_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 - ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]] + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]] + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32) ; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32) ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C1]] + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C]] ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) @@ -397,24 +403,17 @@ body: | ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; VI-LABEL: name: test_fsin_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16) ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s16) - ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16) ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s16) - ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; VI-NEXT: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s16) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) @@ -422,22 +421,15 @@ body: | ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; ; GFX9-LABEL: name: test_fsin_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]] + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s16) - ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16) - ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]] ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) @@ -503,6 +495,7 @@ body: | ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; VI-LABEL: name: test_fsin_v4s16 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -542,6 +535,7 @@ body: | ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; ; GFX9-LABEL: name: test_fsin_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -589,6 +583,7 @@ body: | ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) ; SI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; ; VI-LABEL: name: test_fsin_s32_flags ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -598,6 +593,7 @@ body: | ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32) ; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32) ; VI-NEXT: $vgpr0 = COPY [[INT1]](s32) + ; ; GFX9-LABEL: name: test_fsin_s32_flags ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir index 4089962..0ed4865 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -440,22 +440,14 @@ body: | bb.0: ; SI-LABEL: name: test_fsqrt_v3s16 - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](s32) ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT1]](s32) ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) - ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16) ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT2]](s32) ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT2]](s32) ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) @@ -465,18 +457,10 @@ body: | ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; ; VI-LABEL: name: test_fsqrt_v3s16 - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]] + ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) @@ -484,18 +468,10 @@ body: | ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; ; GFX9-LABEL: name: test_fsqrt_v3s16 - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]] + ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir index 667d79f..b9702d2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -478,27 +478,19 @@ body: | ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_fsub_v3s16 @@ -524,27 +516,19 @@ body: | ; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC3]] ; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC4]] ; VI-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC5]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB1]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_fsub_v3s16 @@ -570,17 +554,10 @@ body: | ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC3]] ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC4]] ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC5]] - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB]](s16), [[FSUB1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB2]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir index 2d54d7c..2d5bf68 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -300,13 +300,12 @@ body: | ; GFX7-LABEL: name: test_icmp_v3s32 ; GFX7: liveins: $vgpr0_vgpr1_vgpr2 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV]] + ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV1]] + ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV2]] ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) @@ -320,13 +319,12 @@ body: | ; GFX8-LABEL: name: test_icmp_v3s32 ; GFX8: liveins: $vgpr0_vgpr1_vgpr2 ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV1]] + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV2]] ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) @@ -340,13 +338,12 @@ body: | ; GFX9-LABEL: name: test_icmp_v3s32 ; GFX9: liveins: $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV1]] + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV2]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir index 1e6f1e7..3f0161f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir @@ -7,13 +7,198 @@ name: test_implicit_def_s1025 body: | bb.0: ; TAHITI-LABEL: name: test_implicit_def_s1025 - ; TAHITI: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; TAHITI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; TAHITI-NEXT: $vgpr0 = COPY [[UV]](s32) + ; TAHITI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; TAHITI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; TAHITI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; TAHITI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; TAHITI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; TAHITI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) + ; TAHITI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; TAHITI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; TAHITI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32) + ; TAHITI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; TAHITI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; TAHITI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32) + ; TAHITI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] + ; TAHITI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; TAHITI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32) + ; TAHITI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; TAHITI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; TAHITI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32) + ; TAHITI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; TAHITI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; TAHITI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32) + ; TAHITI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] + ; TAHITI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; TAHITI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32) + ; TAHITI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; TAHITI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; TAHITI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32) + ; TAHITI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; TAHITI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; TAHITI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32) + ; TAHITI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] + ; TAHITI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; TAHITI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32) + ; TAHITI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] + ; TAHITI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; TAHITI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32) + ; TAHITI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] + ; TAHITI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; TAHITI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32) + ; TAHITI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] + ; TAHITI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; TAHITI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32) + ; TAHITI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] + ; TAHITI-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; TAHITI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32) + ; TAHITI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] + ; TAHITI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; TAHITI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32) + ; TAHITI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] + ; TAHITI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; TAHITI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C17]](s32) + ; TAHITI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] + ; TAHITI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; TAHITI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32) + ; TAHITI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] + ; TAHITI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; TAHITI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C19]](s32) + ; TAHITI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] + ; TAHITI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; TAHITI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C20]](s32) + ; TAHITI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] + ; TAHITI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; TAHITI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32) + ; TAHITI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] + ; TAHITI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; TAHITI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32) + ; TAHITI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] + ; TAHITI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; TAHITI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32) + ; TAHITI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] + ; TAHITI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; TAHITI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32) + ; TAHITI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] + ; TAHITI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; TAHITI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32) + ; TAHITI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] + ; TAHITI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; TAHITI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32) + ; TAHITI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] + ; TAHITI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; TAHITI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32) + ; TAHITI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] + ; TAHITI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; TAHITI-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32) + ; TAHITI-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] + ; TAHITI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; TAHITI-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32) + ; TAHITI-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] + ; TAHITI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; TAHITI-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32) + ; TAHITI-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] + ; TAHITI-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; TAHITI-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32) + ; TAHITI-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] + ; TAHITI-NEXT: $vgpr0 = COPY [[OR30]](s32) + ; ; FIJI-LABEL: name: test_implicit_def_s1025 - ; FIJI: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; FIJI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; FIJI-NEXT: $vgpr0 = COPY [[UV]](s32) + ; FIJI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; FIJI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; FIJI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; FIJI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; FIJI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; FIJI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32) + ; FIJI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; FIJI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; FIJI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32) + ; FIJI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; FIJI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; FIJI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32) + ; FIJI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] + ; FIJI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; FIJI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32) + ; FIJI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; FIJI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; FIJI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32) + ; FIJI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; FIJI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; FIJI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32) + ; FIJI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] + ; FIJI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; FIJI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32) + ; FIJI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; FIJI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; FIJI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32) + ; FIJI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; FIJI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; FIJI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32) + ; FIJI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] + ; FIJI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; FIJI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32) + ; FIJI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] + ; FIJI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; FIJI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32) + ; FIJI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] + ; FIJI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; FIJI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32) + ; FIJI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] + ; FIJI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; FIJI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32) + ; FIJI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] + ; FIJI-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; FIJI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32) + ; FIJI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] + ; FIJI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; FIJI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32) + ; FIJI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] + ; FIJI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; FIJI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C17]](s32) + ; FIJI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] + ; FIJI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; FIJI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32) + ; FIJI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] + ; FIJI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; FIJI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C19]](s32) + ; FIJI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] + ; FIJI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; FIJI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C20]](s32) + ; FIJI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] + ; FIJI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; FIJI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32) + ; FIJI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] + ; FIJI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; FIJI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32) + ; FIJI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] + ; FIJI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; FIJI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32) + ; FIJI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] + ; FIJI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; FIJI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32) + ; FIJI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] + ; FIJI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; FIJI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32) + ; FIJI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] + ; FIJI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; FIJI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32) + ; FIJI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] + ; FIJI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; FIJI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32) + ; FIJI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] + ; FIJI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; FIJI-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32) + ; FIJI-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] + ; FIJI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; FIJI-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32) + ; FIJI-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] + ; FIJI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; FIJI-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32) + ; FIJI-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] + ; FIJI-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; FIJI-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32) + ; FIJI-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] + ; FIJI-NEXT: $vgpr0 = COPY [[OR30]](s32) %0:_(s1025) = G_IMPLICIT_DEF %1:_(s32) = G_TRUNC %0 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index 8113ebf..4e4ab1b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -150,9 +150,8 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s512 - ; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s512), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s512) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -164,9 +163,8 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s1024 - ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s1024), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s1024) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -178,9 +176,8 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s1056 - ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s1056) = G_IMPLICIT_DEF %1:_(s32) = G_TRUNC %0 $vgpr0 = COPY %1 @@ -317,12 +314,10 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v33s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0 %34:_(p1) = COPY $vgpr0_vgpr1 @@ -390,9 +385,8 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s32>) = G_ANYEXT %0 @@ -417,26 +411,15 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v3s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<4 x s16>) = G_IMPLICIT_DEF @@ -462,32 +445,21 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v5s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[UV6]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF @@ -501,11 +473,21 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v6s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV3]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index 1bc7cd0..9b9c6d4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -122,9 +122,8 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[UV8]](s64), [[UV9]](s64), [[UV10]](s64), [[UV11]](s64), [[UV12]](s64), [[UV13]](s64), [[UV14]](s64), [[UV15]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<16 x s64>) = G_IMPLICIT_DEF @@ -144,9 +143,8 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 @@ -190,90 +188,86 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32 ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>), [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD1]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD1]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD2]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD2]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK-NEXT: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK-NEXT: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C7]](s64) - ; CHECK-NEXT: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C8]](s64) - ; CHECK-NEXT: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C9]](s64) - ; CHECK-NEXT: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C10]](s64) - ; CHECK-NEXT: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C11]](s64) - ; CHECK-NEXT: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C12]](s64) - ; CHECK-NEXT: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C13]](s64) - ; CHECK-NEXT: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C14]](s64) - ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>), [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>), [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: G_STORE [[UV16]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD18]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD18]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD19]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD19]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK-NEXT: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD20]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD20]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK-NEXT: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD21]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD21]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK-NEXT: G_STORE [[UV24]](<4 x s32>), [[PTR_ADD22]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD22]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C8]](s64) - ; CHECK-NEXT: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD23]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD23]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C9]](s64) - ; CHECK-NEXT: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD24]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD24]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C10]](s64) - ; CHECK-NEXT: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD25]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD25]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C11]](s64) - ; CHECK-NEXT: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD26]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD26]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C12]](s64) - ; CHECK-NEXT: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD27]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD27]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C13]](s64) - ; CHECK-NEXT: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD28]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD28]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C14]](s64) - ; CHECK-NEXT: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD29]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD29]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 64 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir index d8d0f9b..495740be 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -968,22 +968,13 @@ body: | ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -1006,24 +997,15 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -1046,17 +1028,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -1078,25 +1057,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -1122,20 +1092,17 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -1156,24 +1123,15 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[UV1]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -1198,20 +1156,17 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir index 7426ff9..4ebf883 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -795,27 +795,19 @@ body: | ; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT %25(s16) ; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] ; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) - ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C6]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX8-LABEL: name: test_intrinsic_round_v3s16 @@ -862,27 +854,19 @@ body: | ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] ; GFX8-NEXT: %25:_(s16) = disjoint G_OR [[AND4]], [[AND5]] ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], %25 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) - ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C6]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_intrinsic_round_v3s16 @@ -929,17 +913,10 @@ body: | ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] ; GFX9-NEXT: %25:_(s16) = disjoint G_OR [[AND4]], [[AND5]] ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], %25 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll index 1da38f8..3c6cd49 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -4829,36 +4829,33 @@ define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x floa ; GFX9: bb.1.main_body: ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[DEF]](s32) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX10NSA-LABEL: name: getresinfo_dmask0 ; GFX10NSA: bb.1.main_body: ; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX10NSA-NEXT: {{ $}} - ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10NSA-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GFX10NSA-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; GFX10NSA-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GFX10NSA-NEXT: $vgpr3 = COPY [[DEF]](s32) ; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; ; GFX12-LABEL: name: getresinfo_dmask0 ; GFX12: bb.1.main_body: ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX12-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GFX12-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; GFX12-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GFX12-NEXT: $vgpr3 = COPY [[DEF]](s32) ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %mip = extractelement <2 x i16> %coords, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index 8de5666..5403663 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -709,17 +709,14 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; @@ -729,17 +726,14 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -928,10 +922,9 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: {{ $}} ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; ; PACKED-LABEL: name: image_load_v4f16_dmask_0000 @@ -940,10 +933,9 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, ; PACKED-NEXT: {{ $}} ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll index c36b1bc..50ed411 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -279,10 +279,9 @@ define amdgpu_ps <2 x float> @image_load_v2f32_dmask_0000(<8 x i32> inreg %rsrc, ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x float> %tex @@ -350,11 +349,10 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_0000(<8 x i32> inreg %rsrc, ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x float> %tex @@ -453,12 +451,11 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_0000(<8 x i32> inreg %rsrc, ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32) ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x float> %tex diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index 9c28eb0..a5a76bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -3503,24 +3503,12 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_constant_v3s16_align8 @@ -3529,24 +3517,12 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_constant_v3s16_align8 @@ -3555,20 +3531,9 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 4) @@ -3594,28 +3559,22 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_constant_v3s16_align4 @@ -3629,28 +3588,22 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_constant_v3s16_align4 @@ -3667,18 +3620,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 @@ -3705,28 +3650,22 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_constant_v3s16_align2 @@ -3740,28 +3679,22 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_constant_v3s16_align2 @@ -3778,18 +3711,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 @@ -3830,28 +3755,22 @@ body: | ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_constant_v3s16_align1 @@ -3879,28 +3798,22 @@ body: | ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_constant_v3s16_align1 @@ -3931,18 +3844,10 @@ body: | ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 @@ -5286,33 +5191,21 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5335,9 +5228,8 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_load_constant_v3s64_align8 @@ -5349,9 +5241,8 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-LABEL: name: test_load_constant_v3s64_align8 @@ -5363,9 +5254,8 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 4) @@ -5489,9 +5379,8 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_load_constant_v3s64_align1 @@ -5603,9 +5492,8 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-LABEL: name: test_load_constant_v3s64_align1 @@ -5717,9 +5605,8 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index 16ce48b..f1c9a77 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -6330,26 +6330,20 @@ body: | ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_flat_v3s16_align8 @@ -6363,26 +6357,20 @@ body: | ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align8 @@ -6391,20 +6379,9 @@ body: | ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s16_align8 @@ -6413,20 +6390,9 @@ body: | ; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX12-LABEL: name: test_load_flat_v3s16_align8 @@ -6435,20 +6401,9 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s16_align8 @@ -6457,20 +6412,9 @@ body: | ; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s16_align8 @@ -6479,20 +6423,9 @@ body: | ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s16_align8 @@ -6501,20 +6434,9 @@ body: | ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 0) @@ -6540,26 +6462,20 @@ body: | ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_flat_v3s16_align4 @@ -6573,26 +6489,20 @@ body: | ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align4 @@ -6609,18 +6519,10 @@ body: | ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6638,18 +6540,10 @@ body: | ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6667,18 +6561,10 @@ body: | ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6696,18 +6582,10 @@ body: | ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6725,18 +6603,10 @@ body: | ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6754,18 +6624,10 @@ body: | ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4) ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 @@ -6792,28 +6654,22 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_flat_v3s16_align2 @@ -6827,28 +6683,22 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align2 @@ -6865,18 +6715,10 @@ body: | ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6894,18 +6736,10 @@ body: | ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6923,18 +6757,10 @@ body: | ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6952,18 +6778,10 @@ body: | ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6981,18 +6799,10 @@ body: | ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -7010,18 +6820,10 @@ body: | ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 @@ -7062,28 +6864,22 @@ body: | ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_flat_v3s16_align1 @@ -7111,28 +6907,22 @@ body: | ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align1 @@ -7149,18 +6939,10 @@ body: | ; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1) ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -7178,18 +6960,10 @@ body: | ; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1) ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -7207,18 +6981,10 @@ body: | ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1) ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -7250,18 +7016,10 @@ body: | ; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -7293,18 +7051,10 @@ body: | ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -7336,18 +7086,10 @@ body: | ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 @@ -9927,9 +9669,8 @@ body: | ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_load_flat_v3s64_align32 @@ -9953,9 +9694,8 @@ body: | ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s64_align32 @@ -9967,9 +9707,8 @@ body: | ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s64_align32 @@ -9981,9 +9720,8 @@ body: | ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX12-LABEL: name: test_load_flat_v3s64_align32 @@ -9995,9 +9733,8 @@ body: | ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align32 @@ -10009,9 +9746,8 @@ body: | ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s64_align32 @@ -10023,9 +9759,8 @@ body: | ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align32 @@ -10037,9 +9772,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 0) @@ -10075,9 +9809,8 @@ body: | ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_load_flat_v3s64_align8 @@ -10101,9 +9834,8 @@ body: | ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64) ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s64_align8 @@ -10115,9 +9847,8 @@ body: | ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s64_align8 @@ -10129,9 +9860,8 @@ body: | ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX12-LABEL: name: test_load_flat_v3s64_align8 @@ -10143,9 +9873,8 @@ body: | ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align8 @@ -10157,9 +9886,8 @@ body: | ; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s64_align8 @@ -10171,9 +9899,8 @@ body: | ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align8 @@ -10185,9 +9912,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 0) @@ -10299,9 +10025,8 @@ body: | ; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) ; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR17]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_load_flat_v3s64_align1 @@ -10401,9 +10126,8 @@ body: | ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) ; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR17]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9PLUS-LABEL: name: test_load_flat_v3s64_align1 @@ -10415,9 +10139,8 @@ body: | ; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1) ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX11PLUS-LABEL: name: test_load_flat_v3s64_align1 @@ -10429,9 +10152,8 @@ body: | ; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1) ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX12-LABEL: name: test_load_flat_v3s64_align1 @@ -10443,9 +10165,8 @@ body: | ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1) ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align1 @@ -10557,9 +10278,8 @@ body: | ; UNALIGNED_GFX9PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; UNALIGNED_GFX9PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; UNALIGNED_GFX9PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s64_align1 @@ -10671,9 +10391,8 @@ body: | ; UNALIGNED_GFX11PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; UNALIGNED_GFX11PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; UNALIGNED_GFX11PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align1 @@ -10785,9 +10504,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; UNALIGNED_GFX12-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; UNALIGNED_GFX12-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index 1b72ce5..a15c602 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -5266,24 +5266,12 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 @@ -5292,24 +5280,12 @@ body: | ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 @@ -5318,24 +5294,12 @@ body: | ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_global_v3s16_align8 @@ -5344,24 +5308,12 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 @@ -5370,20 +5322,9 @@ body: | ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 @@ -5392,20 +5333,9 @@ body: | ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 1) @@ -5431,28 +5361,22 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align4 @@ -5466,28 +5390,22 @@ body: | ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align4 @@ -5501,28 +5419,22 @@ body: | ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_global_v3s16_align4 @@ -5536,28 +5448,22 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4 @@ -5574,18 +5480,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -5603,18 +5501,10 @@ body: | ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1) ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -5641,28 +5531,22 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 @@ -5676,28 +5560,22 @@ body: | ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 @@ -5711,28 +5589,22 @@ body: | ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_global_v3s16_align2 @@ -5746,28 +5618,22 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 @@ -5784,18 +5650,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -5813,18 +5671,10 @@ body: | ; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -5865,28 +5715,22 @@ body: | ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v3s16_align1 @@ -5900,28 +5744,22 @@ body: | ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v3s16_align1 @@ -5949,28 +5787,22 @@ body: | ; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_global_v3s16_align1 @@ -5998,28 +5830,22 @@ body: | ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1 @@ -6036,18 +5862,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1) ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -6079,18 +5897,10 @@ body: | ; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -6557,20 +6367,9 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -6579,20 +6378,9 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -6601,20 +6389,9 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v5s16_align16 ; VI: liveins: $vgpr0_vgpr1 @@ -6623,20 +6400,9 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -6645,18 +6411,11 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 @@ -6665,18 +6424,11 @@ body: | ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 16, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF @@ -6703,19 +6455,16 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -6734,29 +6483,26 @@ body: | ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -6775,29 +6521,26 @@ body: | ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v5s16_align8 ; VI: liveins: $vgpr0_vgpr1 @@ -6816,29 +6559,26 @@ body: | ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -6864,11 +6604,8 @@ body: | ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -6897,11 +6634,8 @@ body: | ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -6931,19 +6665,16 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -6962,29 +6693,26 @@ body: | ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -7003,29 +6731,26 @@ body: | ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v5s16_align4 ; VI: liveins: $vgpr0_vgpr1 @@ -7044,29 +6769,26 @@ body: | ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -7092,11 +6814,8 @@ body: | ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -7125,11 +6844,8 @@ body: | ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -7167,29 +6883,26 @@ body: | ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -7208,29 +6921,26 @@ body: | ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -7249,29 +6959,26 @@ body: | ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v5s16_align2 ; VI: liveins: $vgpr0_vgpr1 @@ -7290,29 +6997,26 @@ body: | ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -7338,11 +7042,8 @@ body: | ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -7371,11 +7072,8 @@ body: | ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -7435,29 +7133,26 @@ body: | ; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) + ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v5s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -7476,29 +7171,26 @@ body: | ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v5s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -7539,29 +7231,26 @@ body: | ; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) ; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v5s16_align1 ; VI: liveins: $vgpr0_vgpr1 @@ -7602,29 +7291,26 @@ body: | ; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) + ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -7650,11 +7336,8 @@ body: | ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -7705,11 +7388,8 @@ body: | ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[OR4]](s32) ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -8311,21 +7991,10 @@ body: | ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; SI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align16 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -8334,21 +8003,10 @@ body: | ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align16 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -8357,21 +8015,10 @@ body: | ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CI-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v7s16_align16 ; VI: liveins: $vgpr0_vgpr1 @@ -8380,21 +8027,10 @@ body: | ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]] - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; VI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -8403,20 +8039,13 @@ body: | ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>) - ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-HSA-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) ; ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-MESA: liveins: $vgpr0_vgpr1 @@ -8425,20 +8054,13 @@ body: | ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>) - ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-MESA-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 16, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF @@ -8480,35 +8102,32 @@ body: | ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align8 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -8533,35 +8152,32 @@ body: | ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align8 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -8586,35 +8202,32 @@ body: | ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v7s16_align8 ; VI: liveins: $vgpr0_vgpr1 @@ -8639,35 +8252,32 @@ body: | ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -8702,11 +8312,8 @@ body: | ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -8745,11 +8352,8 @@ body: | ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -8795,35 +8399,32 @@ body: | ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align4 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -8848,35 +8449,32 @@ body: | ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align4 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -8901,35 +8499,32 @@ body: | ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v7s16_align4 ; VI: liveins: $vgpr0_vgpr1 @@ -8954,35 +8549,32 @@ body: | ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -9017,11 +8609,8 @@ body: | ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -9060,11 +8649,8 @@ body: | ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -9110,35 +8696,32 @@ body: | ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align2 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -9163,35 +8746,32 @@ body: | ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align2 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -9216,35 +8796,32 @@ body: | ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v7s16_align2 ; VI: liveins: $vgpr0_vgpr1 @@ -9269,35 +8846,32 @@ body: | ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -9332,11 +8906,8 @@ body: | ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -9375,11 +8946,8 @@ body: | ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -9455,35 +9023,32 @@ body: | ; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) + ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C10]], [[C9]](s32) ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; CI-HSA-LABEL: name: test_load_global_v7s16_align1 ; CI-HSA: liveins: $vgpr0_vgpr1 @@ -9508,35 +9073,32 @@ body: | ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) ; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; CI-MESA-LABEL: name: test_load_global_v7s16_align1 ; CI-MESA: liveins: $vgpr0_vgpr1 @@ -9591,35 +9153,32 @@ body: | ; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] - ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] - ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] - ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) + ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C10]], [[C9]](s32) ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] - ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; VI-LABEL: name: test_load_global_v7s16_align1 ; VI: liveins: $vgpr0_vgpr1 @@ -9674,35 +9233,32 @@ body: | ; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) + ; VI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C10]], [[C9]](s32) ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) - ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>) ; ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-HSA: liveins: $vgpr0_vgpr1 @@ -9737,11 +9293,8 @@ body: | ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -9810,11 +9363,8 @@ body: | ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) ; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>) @@ -11471,66 +11021,42 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: liveins: $vgpr0_vgpr1 ; CI-HSA-NEXT: {{ $}} ; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: liveins: $vgpr0_vgpr1 ; CI-MESA-NEXT: {{ $}} ; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: liveins: $vgpr0_vgpr1 ; GFX9-HSA-NEXT: {{ $}} ; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: liveins: $vgpr0_vgpr1 ; GFX9-MESA-NEXT: {{ $}} ; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -11553,9 +11079,8 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align8 @@ -11567,9 +11092,8 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align8 @@ -11581,9 +11105,8 @@ body: | ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_load_global_v3s64_align8 @@ -11595,9 +11118,8 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8 @@ -11609,9 +11131,8 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8 @@ -11623,9 +11144,8 @@ body: | ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 1) @@ -11749,9 +11269,8 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; CI-HSA-LABEL: name: test_load_global_v3s64_align1 @@ -11763,9 +11282,8 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; CI-MESA-LABEL: name: test_load_global_v3s64_align1 @@ -11877,9 +11395,8 @@ body: | ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_load_global_v3s64_align1 @@ -11991,9 +11508,8 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1 @@ -12005,9 +11521,8 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1 @@ -12119,9 +11634,8 @@ body: | ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64) ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index ff43b07..a38d054 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -8972,24 +8972,12 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-LABEL: name: test_load_local_v3s16_align8 @@ -8998,24 +8986,12 @@ body: | ; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 @@ -9024,24 +9000,12 @@ body: | ; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_local_v3s16_align8 @@ -9050,24 +9014,12 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_local_v3s16_align8 @@ -9076,20 +9028,9 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 @@ -9098,20 +9039,9 @@ body: | ; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX10-LABEL: name: test_load_local_v3s16_align8 @@ -9120,20 +9050,9 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 @@ -9142,20 +9061,9 @@ body: | ; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX11-LABEL: name: test_load_local_v3s16_align8 @@ -9164,20 +9072,9 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 @@ -9186,20 +9083,9 @@ body: | ; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 3) @@ -9225,28 +9111,22 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-LABEL: name: test_load_local_v3s16_align2 @@ -9260,28 +9140,22 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 @@ -9295,28 +9169,22 @@ body: | ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_local_v3s16_align2 @@ -9330,28 +9198,22 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_local_v3s16_align2 @@ -9368,18 +9230,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9397,18 +9251,10 @@ body: | ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9426,18 +9272,10 @@ body: | ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9455,18 +9293,10 @@ body: | ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9484,18 +9314,10 @@ body: | ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9513,18 +9335,10 @@ body: | ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p3) = COPY $vgpr0 @@ -9565,28 +9379,22 @@ body: | ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-LABEL: name: test_load_local_v3s16_align1 @@ -9614,28 +9422,22 @@ body: | ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-DS128-LABEL: name: test_load_local_v3s16_align1 @@ -9663,28 +9465,22 @@ body: | ; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_local_v3s16_align1 @@ -9712,28 +9508,22 @@ body: | ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_local_v3s16_align1 @@ -9764,18 +9554,10 @@ body: | ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9793,18 +9575,10 @@ body: | ; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9836,18 +9610,10 @@ body: | ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9865,18 +9631,10 @@ body: | ; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9908,18 +9666,10 @@ body: | ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9937,18 +9687,10 @@ body: | ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p3) = COPY $vgpr0 @@ -14156,9 +13898,8 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[DEF]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; CI-LABEL: name: test_load_local_v3s64_align32 @@ -14172,9 +13913,8 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[DEF]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; CI-DS128-LABEL: name: test_load_local_v3s64_align32 @@ -14186,9 +13926,8 @@ body: | ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_load_local_v3s64_align32 @@ -14200,9 +13939,8 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-LABEL: name: test_load_local_v3s64_align32 @@ -14214,9 +13952,8 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 @@ -14228,9 +13965,8 @@ body: | ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX10-LABEL: name: test_load_local_v3s64_align32 @@ -14242,9 +13978,8 @@ body: | ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 @@ -14256,9 +13991,8 @@ body: | ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX11-LABEL: name: test_load_local_v3s64_align32 @@ -14270,9 +14004,8 @@ body: | ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 @@ -14284,9 +14017,8 @@ body: | ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index 7498def..6f8aaab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -9605,26 +9605,20 @@ body: | ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-LABEL: name: test_load_private_v3s16_align8 @@ -9638,26 +9632,20 @@ body: | ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_private_v3s16_align8 @@ -9671,26 +9659,20 @@ body: | ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_private_v3s16_align8 @@ -9707,17 +9689,10 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9735,17 +9710,10 @@ body: | ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9755,20 +9723,9 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX12-LABEL: name: test_load_private_v3s16_align8 @@ -9777,20 +9734,9 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s16_align8 @@ -9807,17 +9753,10 @@ body: | ; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9835,17 +9774,10 @@ body: | ; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -9855,20 +9787,9 @@ body: | ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s16_align8 @@ -9877,20 +9798,9 @@ body: | ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 5) @@ -9916,28 +9826,22 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-LABEL: name: test_load_private_v3s16_align2 @@ -9951,28 +9855,22 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_private_v3s16_align2 @@ -9986,28 +9884,22 @@ body: | ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_private_v3s16_align2 @@ -10024,17 +9916,10 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10052,17 +9937,10 @@ body: | ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10080,18 +9958,10 @@ body: | ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10109,18 +9979,10 @@ body: | ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10138,18 +10000,10 @@ body: | ; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10167,18 +10021,10 @@ body: | ; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10196,18 +10042,10 @@ body: | ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10225,18 +10063,10 @@ body: | ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p5) = COPY $vgpr0 @@ -10277,28 +10107,22 @@ body: | ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; CI-LABEL: name: test_load_private_v3s16_align1 @@ -10326,28 +10150,22 @@ body: | ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_load_private_v3s16_align1 @@ -10375,28 +10193,22 @@ body: | ; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]] + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_load_private_v3s16_align1 @@ -10413,17 +10225,10 @@ body: | ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10441,17 +10246,10 @@ body: | ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16) - ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10469,18 +10267,10 @@ body: | ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10498,18 +10288,10 @@ body: | ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10541,18 +10323,10 @@ body: | ; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10584,18 +10358,10 @@ body: | ; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10627,18 +10393,10 @@ body: | ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; @@ -10670,18 +10428,10 @@ body: | ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) - ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p5) = COPY $vgpr0 @@ -15927,9 +15677,8 @@ body: | ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; CI-LABEL: name: test_load_private_v3s64_align32 @@ -15953,9 +15702,8 @@ body: | ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_load_private_v3s64_align32 @@ -15979,9 +15727,8 @@ body: | ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-LABEL: name: test_load_private_v3s64_align32 @@ -16005,9 +15752,8 @@ body: | ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX10-LABEL: name: test_load_private_v3s64_align32 @@ -16031,9 +15777,8 @@ body: | ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX11-LABEL: name: test_load_private_v3s64_align32 @@ -16045,9 +15790,8 @@ body: | ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX12-LABEL: name: test_load_private_v3s64_align32 @@ -16059,9 +15803,8 @@ body: | ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5) ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s64_align32 @@ -16085,9 +15828,8 @@ body: | ; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; UNALIGNED_GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s64_align32 @@ -16111,9 +15853,8 @@ body: | ; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; UNALIGNED_GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64) ; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s64_align32 @@ -16125,9 +15866,8 @@ body: | ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5) ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s64_align32 @@ -16139,9 +15879,8 @@ body: | ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32) ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5) ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir index 7029cad..4546f30 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -509,9 +509,8 @@ body: | ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[DEF]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_lshr_v3s64_v3s32 @@ -524,9 +523,8 @@ body: | ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-LABEL: name: test_lshr_v3s64_v3s32 @@ -539,9 +537,8 @@ body: | ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[DEF]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 @@ -715,20 +712,17 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; VI-LABEL: name: test_lshr_v3s16_v3s16 @@ -754,20 +748,17 @@ body: | ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 @@ -790,13 +781,9 @@ body: | ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -836,26 +823,20 @@ body: | ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_ashr_v3s16_v3s16 @@ -881,27 +862,20 @@ body: | ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 @@ -924,17 +898,10 @@ body: | ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index ef7759f..c9a3060 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -376,22 +376,17 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v5s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV4]], [[UV9]] - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[OR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[OR2]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF @@ -459,24 +454,12 @@ body: | ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 @@ -514,63 +497,62 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v5s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR5]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR9]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF @@ -586,13 +568,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF @@ -608,14 +587,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v4s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UV7]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index f1c4994..12e2cb9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -165,24 +165,12 @@ body: | ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[CONCAT_VECTORS]](<4 x s16>), %bb.0, [[CONCAT_VECTORS1]](<4 x s16>), %bb.1 ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C7]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C7]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -522,7 +510,8 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] @@ -532,29 +521,27 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV16]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV17]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV18]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV19]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV20]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV21]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV22]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV23]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV24]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV25]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV26]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV27]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV28]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV29]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV30]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV31]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<16 x s32>) bb.0: successors: %bb.1, %bb.2 @@ -589,7 +576,8 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] @@ -599,45 +587,43 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV32]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV33]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]] - ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]] - ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]] - ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]] - ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]] - ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]] - ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]] - ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]] - ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]] - ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]] - ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]] - ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]] - ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]] - ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]] - ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]] - ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]] - ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF1]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1 ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<32 x s32>) bb.0: successors: %bb.1, %bb.2 @@ -672,86 +658,81 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32), [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32), [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32), [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32), [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32), [[UV128:%[0-9]+]]:_(s32), [[UV129:%[0-9]+]]:_(s32), [[UV130:%[0-9]+]]:_(s32), [[UV131:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] - ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] - ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] - ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] - ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] - ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] - ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] - ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] - ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] - ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] - ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] - ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] - ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] - ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] - ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] - ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] - ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] - ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] - ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] - ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] - ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] - ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] - ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] - ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] - ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] - ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] - ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] - ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] - ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] - ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] - ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] - ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] - ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] - ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] - ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] - ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] - ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] - ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] - ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] - ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] - ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] - ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] - ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] - ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] - ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] - ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV64]], [[UV128]] - ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[UV129]] - ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV66]], [[UV130]] - ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV67]], [[UV131]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] + ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32) @@ -759,10 +740,10 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV2]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV3]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) bb.0: @@ -946,10 +927,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[BUILD_VECTOR]](<3 x s64>), %bb.0, [[BUILD_VECTOR1]](<3 x s64>), %bb.1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s64), [[UV17:%[0-9]+]]:_(s64), [[UV18:%[0-9]+]]:_(s64), [[UV19:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s64), [[UV21:%[0-9]+]]:_(s64), [[UV22:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[PHI]](<3 x s64>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV20]](s64), [[UV21]](s64), [[UV22]](s64), [[UV19]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s64), [[UV17:%[0-9]+]]:_(s64), [[UV18:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[PHI]](<3 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV16]](s64), [[UV17]](s64), [[UV18]](s64), [[DEF]](s64) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR2]](<4 x s64>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir index 16ad07e..07f20dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -242,32 +242,25 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir index 4c0f4e6..832c80f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -506,27 +506,19 @@ body: | ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX8-LABEL: name: saddsat_v3s16 @@ -571,27 +563,19 @@ body: | ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC5]] ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: saddsat_v3s16 @@ -600,38 +584,28 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV1]], [[BUILD_VECTOR1]] ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir index 12bcecd..be57687 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -404,24 +404,11 @@ body: | ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 @@ -1003,19 +990,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x p0>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x p0>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0), [[UV2:%[0-9]+]]:_(p0), [[UV3:%[0-9]+]]:_(p0), [[UV4:%[0-9]+]]:_(p0), [[UV5:%[0-9]+]]:_(p0), [[UV6:%[0-9]+]]:_(p0), [[UV7:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[DEF]](<8 x p0>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(p0), [[UV9:%[0-9]+]]:_(p0), [[UV10:%[0-9]+]]:_(p0), [[UV11:%[0-9]+]]:_(p0), [[UV12:%[0-9]+]]:_(p0), [[UV13:%[0-9]+]]:_(p0), [[UV14:%[0-9]+]]:_(p0), [[UV15:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[DEF1]](<8 x p0>) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV]], [[UV8]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV9]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV10]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV11]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV12]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV5]], [[UV13]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV6]], [[UV14]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV7]], [[UV15]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]] + ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0), [[SELECT2]](p0), [[SELECT3]](p0), [[SELECT4]](p0), [[SELECT5]](p0), [[SELECT6]](p0), [[SELECT7]](p0) ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x p0>) %0:_(s32) = G_CONSTANT i32 0 @@ -1345,27 +1329,20 @@ body: | ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC3]] ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC4]] ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC2]], [[TRUNC5]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir index 2e764da..b307891 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -924,37 +924,26 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR2]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX8-LABEL: name: test_sext_inreg_v3s16_1 @@ -969,27 +958,21 @@ body: | ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX6-LABEL: name: test_sext_inreg_v3s16_1 @@ -1004,27 +987,21 @@ body: | ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -1210,89 +1187,74 @@ body: | bb.0: ; GFX9-LABEL: name: test_sext_inreg_v6s16_1 - ; GFX9: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; GFX9: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[DEF]], [[BUILD_VECTOR]](<2 x s16>) ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[DEF]], [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV2]], [[BUILD_VECTOR2]](<2 x s16>) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[DEF]], [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[ASHR2]](<2 x s16>) ; GFX9-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX8-LABEL: name: test_sext_inreg_v6s16_1 - ; GFX8: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 - ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 - ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 - ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1 - ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 1 - ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 1 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C1]] - ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; GFX8-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX6-LABEL: name: test_sext_inreg_v6s16_1 - ; GFX6: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1 - ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1 - ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1 - ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1 - ; GFX6-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 1 - ; GFX6-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 1 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]] - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]] - ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX6-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX6-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C1]] - ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C1]] - ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) ; GFX6-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<6 x s16>) = G_SEXT_INREG %0, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index ba66106..15399ce 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -502,9 +502,8 @@ body: | ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[DEF]](s64) ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; VI-LABEL: name: test_shl_v3s64_v3s32 @@ -517,9 +516,8 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[DEF]](s64) ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; GFX9-LABEL: name: test_shl_v3s64_v3s32 @@ -532,9 +530,8 @@ body: | ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[DEF]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 @@ -703,26 +700,20 @@ body: | ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR1]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND1]](s32) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]] ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]] ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL5]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; VI-LABEL: name: test_shl_v3s16_v3s16 @@ -748,27 +739,20 @@ body: | ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL5]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: test_shl_v3s16_v3s16 @@ -791,17 +775,10 @@ body: | ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[TRUNC4]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir index 423ce82..d5947c8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -90,9 +90,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -328,3 +326,30 @@ body: | $vgpr0_vgpr1 = COPY %4 ... + +--- +name: shufflevector_cache_element +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: shufflevector_cache_element + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY2]](s32), [[COPY2]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY3]](s32), [[COPY3]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<8 x s32>), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1 + %3:_(<8 x s32>) = G_SHUFFLE_VECTOR %2, %2, shufflemask(0, 0, 0, 0, 3, 3, 3, 3) + %4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<8 x s32>), %1(s32) + $vgpr0 = COPY %4 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir index 10f425c..dff8111 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -237,16 +237,13 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_0 ; GFX9: liveins: $vgpr0, $vgpr1 @@ -257,10 +254,7 @@ body: | ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 @@ -285,12 +279,10 @@ body: | ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_1 ; GFX9: liveins: $vgpr0, $vgpr1 @@ -301,10 +293,7 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 @@ -329,16 +318,13 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_2 ; GFX9: liveins: $vgpr0, $vgpr1 @@ -349,10 +335,7 @@ body: | ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 @@ -641,12 +624,10 @@ body: | ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_3 ; GFX9: liveins: $vgpr0, $vgpr1 @@ -657,10 +638,7 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 @@ -733,16 +711,13 @@ body: | ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_0 ; GFX9: liveins: $vgpr0, $vgpr1 @@ -753,10 +728,7 @@ body: | ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir index 45714fd9..b5ec2ac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -474,52 +474,20 @@ body: | ; SI-LABEL: name: test_smax_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[C]], [[C]] + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[C]], [[C]] + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[C]], [[C]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; ; VI-LABEL: name: test_smax_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) @@ -529,42 +497,32 @@ body: | ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] - ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) ; ; GFX1250-LABEL: name: test_smax_v3s16 ; GFX1250: liveins: $vgpr0, $vgpr1 ; GFX1250-NEXT: {{ $}} - ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX1250-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX1250-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX1250-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX1250-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX1250-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]] + ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) ; GFX1250-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX1250-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX1250-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX1250-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX1250-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] - ; GFX1250-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; GFX1250-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) - ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX1250-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) - ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX1250-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32) + ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir index 88fe5d0..3eaecf6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -474,52 +474,20 @@ body: | ; SI-LABEL: name: test_smin_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[C]], [[C]] + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[C]], [[C]] + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[C]], [[C]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; ; VI-LABEL: name: test_smin_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) @@ -529,42 +497,32 @@ body: | ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] - ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) ; ; GFX1250-LABEL: name: test_smin_v3s16 ; GFX1250: liveins: $vgpr0, $vgpr1 ; GFX1250-NEXT: {{ $}} - ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX1250-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX1250-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX1250-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX1250-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX1250-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]] + ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) ; GFX1250-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX1250-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX1250-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX1250-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX1250-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] - ; GFX1250-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; GFX1250-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) - ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX1250-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) - ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX1250-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32) + ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir index 07d8df2..de50fce 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -551,26 +551,18 @@ body: | ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C4]] - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX8-LABEL: name: sshlsat_v3s16 @@ -612,27 +604,19 @@ body: | ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: sshlsat_v3s16 @@ -674,17 +658,10 @@ body: | ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT1]](s16), [[SELECT3]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT5]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT5]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir index aa59de0..d28edb0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -242,32 +242,25 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir index 67c715f..dc49872 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -506,27 +506,20 @@ body: | ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX8-LABEL: name: ssubsat_v3s16 @@ -571,27 +564,19 @@ body: | ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC5]] ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) - ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: ssubsat_v3s16 @@ -600,38 +585,28 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV1]], [[BUILD_VECTOR1]] ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index acbcb098..00440ee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -6744,93 +6744,120 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) - ; SI-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; SI-NEXT: G_STORE [[BITCAST4]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; SI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; SI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_v11s16_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]] + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]] + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; CI-NEXT: G_STORE [[BITCAST4]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; CI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; CI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_v11s16_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) - ; VI-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; VI-NEXT: G_STORE [[BITCAST4]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; VI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; VI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_v11s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; GFX9-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: G_STORE [[DEF1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C1]](s64) + ; GFX9-NEXT: G_STORE [[DEF1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64) + ; GFX9-NEXT: G_STORE [[DEF1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<11 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<11 x s16>), align 16, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 7fd2319..ca3abaa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -650,15 +650,14 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] @@ -675,12 +674,11 @@ body: | ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) @@ -708,14 +706,13 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] @@ -726,12 +723,11 @@ body: | ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) @@ -754,14 +750,13 @@ body: | ; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] @@ -772,12 +767,11 @@ body: | ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir index b2fe9b8..f07c078 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir @@ -199,31 +199,24 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND7]](s32), [[AND8]](s32), [[AND9]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir index 87ab135..e454798 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -387,24 +387,16 @@ body: | ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX8-LABEL: name: uaddsat_v3s16 @@ -428,27 +420,19 @@ body: | ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: uaddsat_v3s16 @@ -457,38 +441,28 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV1]], [[BUILD_VECTOR1]] ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir index 32b526e..f89425f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -479,51 +479,20 @@ body: | ; SI-LABEL: name: test_umax_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[C]], [[C]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[C]], [[C]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[C]], [[C]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; ; VI-LABEL: name: test_umax_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) @@ -533,42 +502,32 @@ body: | ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] - ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) ; ; GFX1250-LABEL: name: test_umax_v3s16 ; GFX1250: liveins: $vgpr0, $vgpr1 ; GFX1250-NEXT: {{ $}} - ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX1250-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX1250-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX1250-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX1250-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX1250-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]] + ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) ; GFX1250-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX1250-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX1250-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX1250-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX1250-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] - ; GFX1250-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] - ; GFX1250-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) - ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX1250-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) - ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX1250-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32) + ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir index 8666c29..e3dd7d4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -479,51 +479,20 @@ body: | ; SI-LABEL: name: test_umin_v3s16 ; SI: liveins: $vgpr0, $vgpr1 ; SI-NEXT: {{ $}} - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR1]] - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[C]], [[C]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C]], [[C]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[C]], [[C]] ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; ; VI-LABEL: name: test_umin_v3s16 ; VI: liveins: $vgpr0, $vgpr1 ; VI-NEXT: {{ $}} - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] - ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] - ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]] ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) @@ -533,42 +502,32 @@ body: | ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] - ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) ; ; GFX1250-LABEL: name: test_umin_v3s16 ; GFX1250: liveins: $vgpr0, $vgpr1 ; GFX1250-NEXT: {{ $}} - ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX1250-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX1250-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX1250-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX1250-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX1250-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]] + ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) ; GFX1250-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX1250-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX1250-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX1250-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX1250-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] - ; GFX1250-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] - ; GFX1250-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) - ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX1250-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) - ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32) - ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX1250-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32) + ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir index ee57b72..16ba8c2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -101,15 +101,10 @@ name: test_unmerge_s16_v3s16 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_s16_v3s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[DEF]](s32) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0 %4:_(s32) = G_ANYEXT %1 @@ -869,10 +864,9 @@ name: test_unmerge_v2s1 body: | bb.0: ; CHECK-LABEL: name: test_unmerge_v2s1 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[DEF]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[DEF]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s1) ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC1]](s1) %0:_(<2 x s1>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir index c6e3108..7dd17b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -462,23 +462,16 @@ body: | ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR7]] ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL5]] ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR8]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX8-LABEL: name: ushlsat_v3s16 @@ -512,27 +505,19 @@ body: | ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: ushlsat_v3s16 @@ -566,17 +551,10 @@ body: | ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT]](s16), [[SELECT1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT2]](s16), [[TRUNC6]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir index 6aff253..75ce080 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir @@ -199,31 +199,24 @@ body: | ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32) ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND7]](s32), [[AND8]](s32), [[AND9]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir index 4c4cd38..7758594 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -371,24 +371,16 @@ body: | ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) - ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX8-LABEL: name: usubsat_v3s16 @@ -412,27 +404,19 @@ body: | ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX9-LABEL: name: usubsat_v3s16 @@ -441,38 +425,28 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) - ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[BUILD_VECTOR1]] - ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[BUILD_VECTOR]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV1]], [[BUILD_VECTOR1]] ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index a993afc..a635689 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -376,22 +376,17 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v5s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[UV9]] - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](<2 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[XOR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[XOR2]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF @@ -459,24 +454,12 @@ body: | ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[BITCAST]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 @@ -513,63 +496,62 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v5s16 - ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]] - ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF @@ -585,13 +567,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF @@ -607,14 +586,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v4s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]] - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[UV3]], [[UV7]] + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32), [[XOR3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir index 0b34dff..bce7722 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -730,38 +730,31 @@ body: | ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[SHL3]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[C3]](s64) - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0 ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[COPY2]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[ANYEXT]] ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND2]](s64) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL4]] ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C1]] ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C1]] - ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C1]] - ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]] - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32) - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]] - ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[OR8]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) - ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]] - ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR9]](s32), [[OR10]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV7]](s384) ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112) diff --git a/llvm/test/CodeGen/AMDGPU/absdiff.ll b/llvm/test/CodeGen/AMDGPU/absdiff.ll new file mode 100644 index 0000000..9cb397f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/absdiff.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s + +define amdgpu_ps i16 @absdiff_i16_false(i16 inreg %arg0, i16 inreg %arg1) { +; CHECK-LABEL: absdiff_i16_false: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_sub_i32 s0, s0, s1 +; CHECK-NEXT: s_sext_i32_i16 s1, s0 +; CHECK-NEXT: s_sub_i32 s0, 0, s0 +; CHECK-NEXT: s_sext_i32_i16 s0, s0 +; CHECK-NEXT: s_max_i32 s0, s1, s0 +; CHECK-NEXT: ; return to shader part epilog + %diff = sub i16 %arg0, %arg1 + %res = call i16 @llvm.abs.i16(i16 %diff, i1 false) ; INT_MIN input returns INT_MIN + ret i16 %res +} + +define amdgpu_ps i16 @absdiff_i16_true(i16 inreg %arg0, i16 inreg %arg1) { +; CHECK-LABEL: absdiff_i16_true: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_sub_i32 s0, s0, s1 +; CHECK-NEXT: s_sext_i32_i16 s1, s0 +; CHECK-NEXT: s_sub_i32 s0, 0, s0 +; CHECK-NEXT: s_sext_i32_i16 s0, s0 +; CHECK-NEXT: s_max_i32 s0, s1, s0 +; CHECK-NEXT: ; return to shader part epilog + %diff = sub i16 %arg0, %arg1 + %res = call i16 @llvm.abs.i16(i16 %diff, i1 true) ; INT_MIN input returns poison + ret i16 %res +} + +define amdgpu_ps i32 @absdiff_i32_false(i32 inreg %arg0, i32 inreg %arg1) { +; CHECK-LABEL: absdiff_i32_false: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_absdiff_i32 s0, s0, s1 +; CHECK-NEXT: ; return to shader part epilog + %diff = sub i32 %arg0, %arg1 + %res = call i32 @llvm.abs.i32(i32 %diff, i1 false) ; INT_MIN input returns INT_MIN + ret i32 %res +} + +define amdgpu_ps i32 @absdiff_i32_true(i32 inreg %arg0, i32 inreg %arg1) { +; CHECK-LABEL: absdiff_i32_true: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_absdiff_i32 s0, s0, s1 +; CHECK-NEXT: ; return to shader part epilog + %diff = sub i32 %arg0, %arg1 + %res = call i32 @llvm.abs.i32(i32 %diff, i1 true) ; INT_MIN input returns poison + ret i32 %res +} + +; Multiple uses of %diff. No benefit for using s_absdiff_i32. +define amdgpu_ps i32 @absdiff_i32_false_multi_use(i32 inreg %arg0, i32 inreg %arg1) { +; CHECK-LABEL: absdiff_i32_false_multi_use: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_sub_i32 s1, s0, s1 +; CHECK-NEXT: s_abs_i32 s0, s1 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; use s1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ; return to shader part epilog + %diff = sub i32 %arg0, %arg1 + %res = call i32 @llvm.abs.i32(i32 %diff, i1 false) ; INT_MIN input returns INT_MIN + call void asm "; use $0", "s"(i32 %diff) + ret i32 %res +} + +define <2 x i32> @absdiff_2xi32_false(<2 x i32> %arg0, <2 x i32> %arg1) { +; CHECK-LABEL: absdiff_2xi32_false: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_sub_u32_e32 v0, v0, v2 +; CHECK-NEXT: v_sub_u32_e32 v1, v1, v3 +; CHECK-NEXT: v_sub_u32_e32 v2, 0, v0 +; CHECK-NEXT: v_max_i32_e32 v0, v2, v0 +; CHECK-NEXT: v_sub_u32_e32 v2, 0, v1 +; CHECK-NEXT: v_max_i32_e32 v1, v2, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %diff = sub <2 x i32> %arg0, %arg1 + %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %diff, i1 false) ; INT_MIN input returns INT_MIN + ret <2 x i32> %res +} + +define <4 x i32> @absdiff_4xi32_false(<4 x i32> %arg0, <4 x i32> %arg1) { +; CHECK-LABEL: absdiff_4xi32_false: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_sub_u32_e32 v0, v0, v4 +; CHECK-NEXT: v_sub_u32_e32 v1, v1, v5 +; CHECK-NEXT: v_sub_u32_e32 v4, 0, v0 +; CHECK-NEXT: v_sub_u32_e32 v2, v2, v6 +; CHECK-NEXT: v_max_i32_e32 v0, v4, v0 +; CHECK-NEXT: v_sub_u32_e32 v4, 0, v1 +; CHECK-NEXT: v_sub_u32_e32 v3, v3, v7 +; CHECK-NEXT: v_max_i32_e32 v1, v4, v1 +; CHECK-NEXT: v_sub_u32_e32 v4, 0, v2 +; CHECK-NEXT: v_max_i32_e32 v2, v4, v2 +; CHECK-NEXT: v_sub_u32_e32 v4, 0, v3 +; CHECK-NEXT: v_max_i32_e32 v3, v4, v3 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %diff = sub <4 x i32> %arg0, %arg1 + %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %diff, i1 false) ; INT_MIN input returns INT_MIN + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll index 9db6d70..6a95881 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll @@ -3154,17 +3154,19 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:8 row_mask:0xf bank_mask:0xf @@ -3172,6 +3174,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v5, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, -1, -1 @@ -3184,8 +3187,9 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s2 ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 31 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] @@ -3237,8 +3241,9 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX1164_DPP-NEXT: v_mov_b32_e32 v9, v5 ; GFX1164_DPP-NEXT: v_readfirstlane_b32 s3, v7 -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_u32 v6, vcc, s2, v8 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_mov_b32 s2, s6 ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v7, null, s3, v9, vcc ; GFX1164_DPP-NEXT: s_mov_b32 s3, 0x31016000 @@ -5856,6 +5861,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace ; GFX1164-NEXT: buffer_gl1_inv ; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[7:8] +; GFX1164-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-NEXT: s_or_b64 s[10:11], vcc, s[10:11] ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[10:11] @@ -6384,6 +6390,7 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX1164-NEXT: buffer_gl1_inv ; GFX1164-NEXT: buffer_gl0_inv ; GFX1164-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[7:8] +; GFX1164-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-NEXT: s_or_b64 s[12:13], vcc, s[12:13] ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[12:13] @@ -6983,6 +6990,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_ITERATIVE-NEXT: buffer_gl1_inv ; GFX1164_ITERATIVE-NEXT: buffer_gl0_inv ; GFX1164_ITERATIVE-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] +; GFX1164_ITERATIVE-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_ITERATIVE-NEXT: s_or_b64 s[12:13], vcc, s[12:13] ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 exec, exec, s[12:13] @@ -7669,17 +7677,19 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:8 row_mask:0xf bank_mask:0xf @@ -7687,6 +7697,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v5, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, -1, -1 @@ -7699,8 +7710,9 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s2 ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 31 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] @@ -7748,8 +7760,9 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_mov_b32_e32 v11, v7 ; GFX1164_DPP-NEXT: v_mov_b32_e32 v10, v6 -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_sub_co_u32 v8, vcc, v10, s8 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_subrev_co_ci_u32_e64 v9, null, s9, v11, vcc ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_mov_b32_e32 v6, v8 @@ -7761,6 +7774,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: buffer_gl1_inv ; GFX1164_DPP-NEXT: buffer_gl0_inv ; GFX1164_DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[10:11] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_or_b64 s[12:13], vcc, s[12:13] ; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164_DPP-NEXT: s_and_not1_b64 exec, exec, s[12:13] @@ -7775,8 +7789,9 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX1164_DPP-NEXT: v_mov_b32_e32 v9, v5 ; GFX1164_DPP-NEXT: v_readfirstlane_b32 s3, v7 -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_sub_co_u32 v6, vcc, s2, v8 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_mov_b32 s2, -1 ; GFX1164_DPP-NEXT: v_sub_co_ci_u32_e64 v7, null, s3, v9, vcc ; GFX1164_DPP-NEXT: s_mov_b32 s3, 0x31016000 @@ -12770,6 +12785,7 @@ define amdgpu_kernel void @uniform_fadd_bf16(ptr addrspace(1) %result, ptr addrs ; GFX1164-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[2:3], off, s[4:7], 0 glc ; GFX1164-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX1164-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX1164-TRUE16-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-TRUE16-NEXT: v_mov_b32_e32 v1, v2 ; GFX1164-TRUE16-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX1164-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -12826,6 +12842,7 @@ define amdgpu_kernel void @uniform_fadd_bf16(ptr addrspace(1) %result, ptr addrs ; GFX1164-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[2:3], off, s[4:7], 0 glc ; GFX1164-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX1164-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX1164-FAKE16-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-FAKE16-NEXT: v_mov_b32_e32 v1, v2 ; GFX1164-FAKE16-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX1164-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -13804,9 +13821,10 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add ; GFX1164-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v2 ; GFX1164-TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff ; GFX1164-TRUE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff -; GFX1164-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1164-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX1164-TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GFX1164-TRUE16-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX1164-TRUE16-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-TRUE16-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc ; GFX1164-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1164-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h @@ -13815,6 +13833,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add ; GFX1164-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[2:3], off, s[4:7], 0 glc ; GFX1164-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX1164-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX1164-TRUE16-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-TRUE16-NEXT: v_mov_b32_e32 v1, v2 ; GFX1164-TRUE16-NEXT: s_or_b64 s[8:9], vcc, s[8:9] ; GFX1164-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -13863,6 +13882,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add ; GFX1164-FAKE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff ; GFX1164-FAKE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff ; GFX1164-FAKE16-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0 +; GFX1164-FAKE16-NEXT: s_waitcnt_depctr 0xf1ff ; GFX1164-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164-FAKE16-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc ; GFX1164-FAKE16-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[0:1] @@ -13873,6 +13893,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add ; GFX1164-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[2:3], off, s[4:7], 0 glc ; GFX1164-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX1164-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX1164-FAKE16-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-FAKE16-NEXT: v_mov_b32_e32 v1, v2 ; GFX1164-FAKE16-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX1164-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 6167a84..08a4f0c 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -957,6 +957,7 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63 ; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32 @@ -2640,17 +2641,19 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:8 row_mask:0xf bank_mask:0xf @@ -2658,6 +2661,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v5, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, -1, -1 @@ -2670,13 +2674,15 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s2 ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 31 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v1 row_shr:1 row_mask:0xf bank_mask:0xf @@ -2701,6 +2707,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_writelane_b32 v6, s9, 48 ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7] ; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_mov_b32 s2, -1 ; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc @@ -3288,23 +3295,26 @@ define amdgpu_kernel void @add_i64_varying_nouse() { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_xmask:2 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_xmask:2 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_xmask:4 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_xmask:4 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_xmask:8 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_xmask:8 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_permlanex16_b32 v3, v2, 0, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc ; GFX1164_DPP-NEXT: v_add_co_u32 v2, vcc, v2, v3 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, 0, 0 ; GFX1164_DPP-NEXT: v_permlane64_b32 v3, v2 @@ -3312,10 +3322,11 @@ define amdgpu_kernel void @add_i64_varying_nouse() { ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc ; GFX1164_DPP-NEXT: v_permlane64_b32 v4, v1 ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX1164_DPP-NEXT: v_add_co_u32 v2, vcc, v2, v3 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffc ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v4, vcc ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, v2 @@ -4337,6 +4348,7 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63 ; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32 @@ -6043,17 +6055,19 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v3, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:8 row_mask:0xf bank_mask:0xf @@ -6061,6 +6075,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v5, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, -1, -1 @@ -6073,13 +6088,15 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s2 ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 31 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v1 row_shr:1 row_mask:0xf bank_mask:0xf @@ -6104,6 +6121,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_writelane_b32 v6, s9, 48 ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7] ; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_mov_b32 s2, -1 ; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc @@ -6724,6 +6742,7 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63 ; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32 @@ -7441,6 +7460,7 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_readlane_b32 s3, v2, 15 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 31 @@ -8071,6 +8091,7 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63 ; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32 @@ -8787,6 +8808,7 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_readlane_b32 s3, v2, 15 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 31 @@ -9417,6 +9439,7 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63 ; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32 @@ -10133,6 +10156,7 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_readlane_b32 s3, v2, 15 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 31 @@ -10763,6 +10787,7 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63 ; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32 @@ -11041,6 +11066,7 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1] +; GFX1164-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 @@ -11870,16 +11896,19 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[5:6] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v6, 1 ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0 +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v2 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v4, 1 @@ -11887,8 +11916,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[5:6] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; GFX1164_DPP-NEXT: v_permlanex16_b32 v5, v2, -1, -1 @@ -11898,6 +11928,7 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf ; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v4, 1 @@ -11912,14 +11943,17 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v5 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf ; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf @@ -11944,6 +11978,7 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_writelane_b32 v4, s9, 48 ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7] ; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_mov_b32 s2, -1 ; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc @@ -12577,6 +12612,7 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63 ; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32 @@ -12855,6 +12891,7 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1] +; GFX1164-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 @@ -13684,16 +13721,19 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[5:6] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v6, -2 ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, -1 +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v2 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v4, -2 @@ -13701,8 +13741,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[5:6] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; GFX1164_DPP-NEXT: v_permlanex16_b32 v5, v2, -1, -1 @@ -13712,6 +13753,7 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf ; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v4, -2 @@ -13726,14 +13768,17 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v5 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf ; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v5, -2 -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, -1 ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf @@ -13758,6 +13803,7 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_writelane_b32 v4, s9, 48 ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7] ; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_mov_b32 s2, -1 ; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc @@ -14391,6 +14437,7 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63 ; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32 @@ -14665,6 +14712,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1] +; GFX1164-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v1, 0, s3, vcc ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 @@ -15489,16 +15537,19 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[5:6] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0 +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v2 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 @@ -15506,8 +15557,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[5:6] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; GFX1164_DPP-NEXT: v_permlanex16_b32 v5, v2, -1, -1 @@ -15517,6 +15569,7 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf ; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 @@ -15531,14 +15584,17 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v5 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0 -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf @@ -15563,6 +15619,7 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_writelane_b32 v4, s9, 48 ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7] ; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_mov_b32 s2, -1 ; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc @@ -16190,6 +16247,7 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47 ; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63 ; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32 @@ -16465,6 +16523,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1] +; GFX1164-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 @@ -17287,16 +17346,19 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:2 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:2 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[5:6] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v6, -1 ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, -1 +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v2 row_shr:4 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:4 row_mask:0xf bank_mask:0xf +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, -1 @@ -17304,8 +17366,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:8 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:8 row_mask:0xf bank_mask:0xf -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[5:6] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; GFX1164_DPP-NEXT: v_permlanex16_b32 v5, v2, -1, -1 @@ -17315,6 +17378,7 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf ; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, -1 @@ -17329,14 +17393,17 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v5 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf ; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, -1 -; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[3:4] +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, -1 ; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1] +; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15 ; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf @@ -17361,6 +17428,7 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_writelane_b32 v4, s9, 48 ; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7] ; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd ; GFX1164_DPP-NEXT: s_mov_b32 s2, -1 ; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll index 0f59304..62f09de 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll @@ -515,6 +515,7 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-NEXT: s_or_saveexec_b64 s[10:11], -1 +; GFX1164-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-NEXT: v_readlane_b32 s12, v1, 63 ; GFX1164-NEXT: v_readlane_b32 s14, v1, 47 ; GFX1164-NEXT: v_writelane_b32 v3, s13, 32 diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll index 5959f76..f6149cf 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -1387,7 +1387,7 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 -; GFX11-NEXT: s_waitcnt_depctr 0xfffe +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_u32 s0, s2, s0 ; GFX11-NEXT: s_addc_u32 s1, s3, s1 ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll index b7ee9f7..65832f8 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll @@ -519,6 +519,7 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] @@ -649,6 +650,7 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] @@ -771,6 +773,7 @@ define amdgpu_ps void @global_max_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -885,6 +888,7 @@ define amdgpu_ps void @global_max_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1443,6 +1447,7 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn(ptr addrspace(1) inreg %s ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] @@ -1573,6 +1578,7 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn_neg128(ptr addrspace(1) i ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] @@ -1695,6 +1701,7 @@ define amdgpu_ps void @global_min_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1809,6 +1816,7 @@ define amdgpu_ps void @global_min_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2367,6 +2375,7 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn(ptr addrspace(1) inreg % ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] @@ -2497,6 +2506,7 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn_neg128(ptr addrspace(1) ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] @@ -2619,6 +2629,7 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn(ptr addrspace(1) inreg %sbase ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2733,6 +2744,7 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -3291,6 +3303,7 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn(ptr addrspace(1) inreg % ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] @@ -3421,6 +3434,7 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn_neg128(ptr addrspace(1) ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] @@ -3543,6 +3557,7 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn(ptr addrspace(1) inreg %sbase ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -3657,6 +3672,7 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn_neg128(ptr addrspace(1) inreg ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-NEXT: v_mov_b32_e32 v5, v3 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll index 4581efc..ff4b658 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll @@ -963,14 +963,14 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB1_2 ; GFX1164-DPP-NEXT: ; %bb.1: @@ -1996,14 +1996,14 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB3_2 ; GFX1164-DPP-NEXT: ; %bb.1: @@ -3029,14 +3029,14 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2 ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164-DPP-NEXT: ; %bb.1: @@ -4248,10 +4248,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] ; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2 @@ -5511,10 +5512,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] ; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2 @@ -6774,10 +6776,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] ; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll index bd570d9..15d4b9a 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll @@ -963,14 +963,14 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_min_f32_e32 v1, v1, v2 ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB1_2 ; GFX1164-DPP-NEXT: ; %bb.1: @@ -1996,14 +1996,14 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_min_f32_e32 v1, v1, v2 ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB3_2 ; GFX1164-DPP-NEXT: ; %bb.1: @@ -3029,14 +3029,14 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_min_f32_e32 v1, v1, v2 ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0 -; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164-DPP-NEXT: ; %bb.1: @@ -4248,10 +4248,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] ; GFX1164-DPP-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2 @@ -5511,10 +5512,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] ; GFX1164-DPP-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2 @@ -6774,10 +6776,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] ; GFX1164-DPP-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] ; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1] -; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec +; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe ; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir index ec940f8..2f6e986 100644 --- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir +++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir @@ -8,16 +8,14 @@ body: | ; CHECK-LABEL: name: test_struct ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<3 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr4_vgpr5 = COPY [[DEF2]](s64) - ; CHECK-NEXT: $vgpr6 = COPY [[DEF3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr4_vgpr5 = COPY [[DEF1]](s64) + ; CHECK-NEXT: $vgpr6 = COPY [[DEF2]](<2 x s16>) ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %0:_(s32), %1:_(<3 x s32>), %2:_(s64), %3:_(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.dead) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll index 0a2e7af..8103140 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll @@ -1185,6 +1185,7 @@ define amdgpu_ps void @fcmp_x2(float %a) #0 { ; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0x3e800000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc ; GFX11-NEXT: v_cmp_nle_f32_e32 vcc, 0, v0 +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc ; GFX11-NEXT: s_cbranch_scc0 .LBB21_1 ; GFX11-NEXT: s_endpgm @@ -1595,6 +1596,7 @@ define amdgpu_ps void @kill_with_loop_exit(float inreg %inp0, float inreg %inp1, ; GFX11-NEXT: s_mov_b64 s[2:3], exec ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX11-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xf1ff ; GFX11-NEXT: .LBB26_2: ; %bb ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_add_f32_e32 v0, 0x3e800000, v0 diff --git a/llvm/test/CodeGen/AMDGPU/loop-vector-sink.ll b/llvm/test/CodeGen/AMDGPU/loop-vector-sink.ll new file mode 100644 index 0000000..670e2c5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/loop-vector-sink.ll @@ -0,0 +1,236 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefix=OPT %s + +; testing insert case +define amdgpu_kernel void @runningSum(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %inputElement1, i32 %inputIter) { +; OPT-LABEL: define amdgpu_kernel void @runningSum( +; OPT-SAME: ptr addrspace(1) [[OUT0:%.*]], ptr addrspace(1) [[OUT1:%.*]], i32 [[INPUTELEMENT1:%.*]], i32 [[INPUTITER:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-NEXT: [[PREHEADER:.*]]: +; OPT-NEXT: [[VECELEMENT1:%.*]] = insertelement <2 x i32> poison, i32 [[INPUTELEMENT1]], i64 0 +; OPT-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VECELEMENT1]], <2 x i32> poison, <2 x i32> zeroinitializer +; OPT-NEXT: br label %[[LOOPBODY:.*]] +; OPT: [[LOOPBODY]]: +; OPT-NEXT: [[PREVIOUSSUM:%.*]] = phi <2 x i32> [ [[TMP1]], %[[PREHEADER]] ], [ [[RUNNINGSUM:%.*]], %[[LOOPBODY]] ] +; OPT-NEXT: [[ITERCOUNT:%.*]] = phi i32 [ [[INPUTITER]], %[[PREHEADER]] ], [ [[ITERSLEFT:%.*]], %[[LOOPBODY]] ] +; OPT-NEXT: [[RUNNINGSUM]] = add <2 x i32> [[TMP1]], [[PREVIOUSSUM]] +; OPT-NEXT: [[ITERSLEFT]] = sub i32 [[ITERCOUNT]], 1 +; OPT-NEXT: [[COND:%.*]] = icmp eq i32 [[ITERSLEFT]], 0 +; OPT-NEXT: br i1 [[COND]], label %[[LOOPEXIT:.*]], label %[[LOOPBODY]] +; OPT: [[LOOPEXIT]]: +; OPT-NEXT: [[SUMELEMENT0:%.*]] = extractelement <2 x i32> [[RUNNINGSUM]], i64 0 +; OPT-NEXT: [[SUMELEMENT1:%.*]] = extractelement <2 x i32> [[RUNNINGSUM]], i64 1 +; OPT-NEXT: store i32 [[SUMELEMENT0]], ptr addrspace(1) [[OUT0]], align 4 +; OPT-NEXT: store i32 [[SUMELEMENT1]], ptr addrspace(1) [[OUT1]], align 4 +; OPT-NEXT: ret void +; +preheader: + %vecElement1 = insertelement <2 x i32> poison, i32 %inputElement1, i64 0 + %broadcast1 = shufflevector <2 x i32> %vecElement1, <2 x i32> poison, <2 x i32> zeroinitializer + br label %loopBody + +loopBody: + %previousSum = phi <2 x i32> [ %broadcast1, %preheader ], [ %runningSum, %loopBody ] + %iterCount = phi i32 [ %inputIter, %preheader ], [ %itersLeft, %loopBody ] + %runningSum = add <2 x i32> %broadcast1, %previousSum + %itersLeft = sub i32 %iterCount, 1 + %cond = icmp eq i32 %itersLeft, 0 + br i1 %cond, label %loopExit, label %loopBody + +loopExit: + %sumElement0 = extractelement <2 x i32> %runningSum, i64 0 + %sumElement1 = extractelement <2 x i32> %runningSum, i64 1 + store i32 %sumElement0, ptr addrspace(1) %out0 + store i32 %sumElement1, ptr addrspace(1) %out1 + ret void +} + +; testing extract case with single use - with divergent control flow +; The vector has SINGLE use (extractelement), both sink into if.then +define amdgpu_kernel void @test_sink_extract_single_use_operands(ptr addrspace(1) %out0, <2 x i32> %inputVec, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_sink_extract_single_use_operands( +; OPT-SAME: ptr addrspace(1) [[OUT0:%.*]], <2 x i32> [[INPUTVEC:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[RUNNINGSUM:%.*]] = add <2 x i32> [[INPUTVEC]], splat (i32 1) +; OPT-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[RUNNINGSUM]], i64 0 +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT:%.*]] = add i32 [[TMP0]], 100 +; OPT-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT0]], align 4 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %runningSum = add <2 x i32> %inputVec, <i32 1, i32 1> + %sumElement0 = extractelement <2 x i32> %runningSum, i64 0 + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result = add i32 %sumElement0, 100 + store i32 %result, ptr addrspace(1) %out0 + br label %if.end + +if.end: + ret void +} + +; testing extract case - extracting two elements with divergent control flow +; The vector has TWO uses (two extractelements), all sink into if.then +define amdgpu_kernel void @test_sink_extract_operands(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %input_vec, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_sink_extract_operands( +; OPT-SAME: ptr addrspace(1) [[OUT0:%.*]], ptr addrspace(1) [[OUT1:%.*]], <4 x i32> [[INPUT_VEC:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[VEC_FULL:%.*]] = add <4 x i32> [[INPUT_VEC]], <i32 42, i32 43, i32 44, i32 45> +; OPT-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[VEC_FULL]], i64 0 +; OPT-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[VEC_FULL]], i64 1 +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT0:%.*]] = add i32 [[TMP0]], 100 +; OPT-NEXT: [[RESULT1:%.*]] = add i32 [[TMP1]], 200 +; OPT-NEXT: store i32 [[RESULT0]], ptr addrspace(1) [[OUT0]], align 4 +; OPT-NEXT: store i32 [[RESULT1]], ptr addrspace(1) [[OUT1]], align 4 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %vec_full = add <4 x i32> %input_vec, <i32 42, i32 43, i32 44, i32 45> + %extract0 = extractelement <4 x i32> %vec_full, i64 0 + %extract1 = extractelement <4 x i32> %vec_full, i64 1 + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result0 = add i32 %extract0, 100 + %result1 = add i32 %extract1, 200 + store i32 %result0, ptr addrspace(1) %out0 + store i32 %result1, ptr addrspace(1) %out1 + br label %if.end + +if.end: + ret void +} + +; testing shuffle case with divergent control flow - shuffles sink into if.then +define amdgpu_kernel void @test_shuffle_insert_subvector(ptr addrspace(1) %ptr, <4 x i16> %vec1, <4 x i16> %vec2, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_shuffle_insert_subvector( +; OPT-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x i16> [[VEC1:%.*]], <4 x i16> [[VEC2:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC1]], <4 x i16> [[VEC2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> +; OPT-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i16> [[VEC1]], <4 x i16> [[VEC2]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> +; OPT-NEXT: [[SHUFFLE3:%.*]] = shufflevector <4 x i16> [[VEC1]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; OPT-NEXT: [[SHUFFLE4:%.*]] = shufflevector <4 x i16> [[VEC2]], <4 x i16> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> +; OPT-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x i16> [[SHUFFLE]], <4 x i16> [[SHUFFLE2]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT_VEC:%.*]] = add <4 x i16> [[SHUFFLE5]], <i16 100, i16 200, i16 300, i16 400> +; OPT-NEXT: [[OTHER_RESULT:%.*]] = mul <4 x i16> [[SHUFFLE3]], splat (i16 2) +; OPT-NEXT: [[MORE_RESULT:%.*]] = sub <4 x i16> [[SHUFFLE4]], splat (i16 5) +; OPT-NEXT: store <4 x i16> [[RESULT_VEC]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: store <4 x i16> [[OTHER_RESULT]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: store <4 x i16> [[MORE_RESULT]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %shuffle = shufflevector <4 x i16> %vec1, <4 x i16> %vec2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + %shuffle2 = shufflevector <4 x i16> %vec1, <4 x i16> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> + %shuffle3 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %shuffle4 = shufflevector <4 x i16> %vec2, <4 x i16> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> + %shuffle5 = shufflevector <4 x i16> %shuffle, <4 x i16> %shuffle2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result_vec = add <4 x i16> %shuffle5, <i16 100, i16 200, i16 300, i16 400> + %other_result = mul <4 x i16> %shuffle3, <i16 2, i16 2, i16 2, i16 2> + %more_result = sub <4 x i16> %shuffle4, <i16 5, i16 5, i16 5, i16 5> + store <4 x i16> %result_vec, ptr addrspace(1) %ptr + store <4 x i16> %other_result, ptr addrspace(1) %ptr + store <4 x i16> %more_result, ptr addrspace(1) %ptr + br label %if.end + +if.end: + ret void +} + +; testing shuffle extract subvector with divergent control flow - shuffles sink into if.then +define amdgpu_kernel void @test_shuffle_extract_subvector(ptr addrspace(1) %ptr, <4 x i16> %input_vec, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_shuffle_extract_subvector( +; OPT-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x i16> [[INPUT_VEC:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[INPUT_VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> +; OPT-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i16> [[INPUT_VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> +; OPT-NEXT: [[SHUFFLE3:%.*]] = shufflevector <4 x i16> [[INPUT_VEC]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT_VEC:%.*]] = add <2 x i16> [[SHUFFLE]], <i16 100, i16 200> +; OPT-NEXT: [[RESULT_VEC2:%.*]] = mul <2 x i16> [[SHUFFLE2]], splat (i16 3) +; OPT-NEXT: [[RESULT_VEC3:%.*]] = sub <4 x i16> [[SHUFFLE3]], splat (i16 10) +; OPT-NEXT: store <2 x i16> [[RESULT_VEC]], ptr addrspace(1) [[PTR]], align 4 +; OPT-NEXT: store <2 x i16> [[RESULT_VEC2]], ptr addrspace(1) [[PTR]], align 4 +; OPT-NEXT: store <4 x i16> [[RESULT_VEC3]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %shuffle = shufflevector <4 x i16> %input_vec, <4 x i16> poison, <2 x i32> <i32 2, i32 3> + %shuffle2 = shufflevector <4 x i16> %input_vec, <4 x i16> poison, <2 x i32> <i32 0, i32 1> + %shuffle3 = shufflevector <4 x i16> %input_vec, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result_vec = add <2 x i16> %shuffle, <i16 100, i16 200> + %result_vec2 = mul <2 x i16> %shuffle2, <i16 3, i16 3> + %result_vec3 = sub <4 x i16> %shuffle3, <i16 10, i16 10, i16 10, i16 10> + store <2 x i16> %result_vec, ptr addrspace(1) %ptr + store <2 x i16> %result_vec2, ptr addrspace(1) %ptr + store <4 x i16> %result_vec3, ptr addrspace(1) %ptr + br label %if.end + +if.end: + ret void +} + +; testing shuffle sink with widening operations and divergent control flow +define amdgpu_kernel void @test_shuffle_sink_operands(ptr addrspace(1) %ptr, <2 x i16> %input_vec, <2 x i16> %input_vec2, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_shuffle_sink_operands( +; OPT-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x i16> [[INPUT_VEC:%.*]], <2 x i16> [[INPUT_VEC2:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[INPUT_VEC]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> +; OPT-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x i16> [[INPUT_VEC2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT_VEC:%.*]] = add <4 x i16> [[SHUFFLE]], <i16 100, i16 200, i16 300, i16 400> +; OPT-NEXT: [[RESULT_VEC2:%.*]] = mul <4 x i16> [[SHUFFLE2]], splat (i16 5) +; OPT-NEXT: store <4 x i16> [[RESULT_VEC]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: store <4 x i16> [[RESULT_VEC2]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %shuffle = shufflevector <2 x i16> %input_vec, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> + %shuffle2 = shufflevector <2 x i16> %input_vec2, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result_vec = add <4 x i16> %shuffle, <i16 100, i16 200, i16 300, i16 400> + %result_vec2 = mul <4 x i16> %shuffle2, <i16 5, i16 5, i16 5, i16 5> + store <4 x i16> %result_vec, ptr addrspace(1) %ptr + store <4 x i16> %result_vec2, ptr addrspace(1) %ptr + br label %if.end + +if.end: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/nor-divergent-lanemask.ll b/llvm/test/CodeGen/AMDGPU/nor-divergent-lanemask.ll index 4205393..97c2bdc 100644 --- a/llvm/test/CodeGen/AMDGPU/nor-divergent-lanemask.ll +++ b/llvm/test/CodeGen/AMDGPU/nor-divergent-lanemask.ll @@ -13,6 +13,7 @@ define amdgpu_ps i64 @test_nor(i64 inreg %a, i64 inreg %b) { ; SDAG-W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; SDAG-W64-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; SDAG-W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v0 +; SDAG-W64-NEXT: s_waitcnt_depctr 0xf1ff ; SDAG-W64-NEXT: ; return to shader part epilog ; ; GISEL-W64-LABEL: test_nor: @@ -57,8 +58,8 @@ define amdgpu_ps i64 @test_or_two_uses(i64 inreg %a, i64 inreg %b) { ; SDAG-W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; SDAG-W64-NEXT: s_delay_alu instid0(VALU_DEP_2) ; SDAG-W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1 +; SDAG-W64-NEXT: s_waitcnt_depctr 0xf1ff ; SDAG-W64-NEXT: s_and_b64 s[0:1], s[0:1], vcc -; SDAG-W64-NEXT: s_waitcnt_depctr 0xfffe ; SDAG-W64-NEXT: ; return to shader part epilog ; ; GISEL-W64-LABEL: test_or_two_uses: diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir index 8eddc9a..c9208bf 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir @@ -73,7 +73,7 @@ body: | ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = IMPLICIT_DEF SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -112,7 +112,7 @@ body: | ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) @@ -157,7 +157,7 @@ body: | ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) @@ -203,7 +203,7 @@ body: | ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF @@ -255,7 +255,7 @@ body: | ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll b/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll index dd5f838..0166d7a 100644 --- a/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll +++ b/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll @@ -110,6 +110,21 @@ define amdgpu_ps i32 @abs32(i32 inreg %val0) { ret i32 %zext } +define amdgpu_ps i32 @absdiff32(i32 inreg %val0, i32 inreg %val1) { +; CHECK-LABEL: absdiff32: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_absdiff_i32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: ; return to shader part epilog + %diff = sub i32 %val0, %val1 + %result = call i32 @llvm.abs.i32(i32 %diff, i1 false) + %cmp = icmp ne i32 %result, 0 + %zext = zext i1 %cmp to i32 + ret i32 %zext +} + define amdgpu_ps i32 @and32(i32 inreg %val0, i32 inreg %val1) { ; CHECK-LABEL: and32: ; CHECK: ; %bb.0: @@ -608,14 +623,14 @@ define amdgpu_ps i32 @si_pc_add_rel_offset_must_not_optimize() { ; CHECK-NEXT: s_add_u32 s0, s0, __unnamed_1@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s1, s1, __unnamed_1@rel32@hi+12 ; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB35_2 +; CHECK-NEXT: s_cbranch_scc0 .LBB36_2 ; CHECK-NEXT: ; %bb.1: ; %endif ; CHECK-NEXT: s_mov_b32 s0, 1 -; CHECK-NEXT: s_branch .LBB35_3 -; CHECK-NEXT: .LBB35_2: ; %if +; CHECK-NEXT: s_branch .LBB36_3 +; CHECK-NEXT: .LBB36_2: ; %if ; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_branch .LBB35_3 -; CHECK-NEXT: .LBB35_3: +; CHECK-NEXT: s_branch .LBB36_3 +; CHECK-NEXT: .LBB36_3: %cmp = icmp ne ptr addrspace(4) @1, null br i1 %cmp, label %endif, label %if diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index b21c781..5461532 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1239,6 +1239,7 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x, i32 inreg %y) #0 ; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc ; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc ; GFX11-NEXT: s_cbranch_scc0 .LBB11_6 ; GFX11-NEXT: ; %bb.1: ; %bb @@ -2066,6 +2067,7 @@ define amdgpu_ps void @scc_use_after_kill_inst(float inreg %x, i32 inreg %y) #0 ; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc ; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 +; GFX11-NEXT: s_waitcnt_depctr 0xfffd ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc ; GFX11-NEXT: s_cbranch_scc0 .LBB17_6 ; GFX11-NEXT: ; %bb.1: ; %bb diff --git a/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir new file mode 100644 index 0000000..bb87b6e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir @@ -0,0 +1,324 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx950 -run-pass prologepilog,machine-cp -o - %s | FileCheck -check-prefix=GFX950 %s + +--- | + define amdgpu_kernel void @full_copy() #0 { ret void } + + define amdgpu_kernel void @partial_copy() #0 { ret void } + + define amdgpu_kernel void @full_spill() #0 { ret void } + + attributes #0 = { "amdgpu-waves-per-eu"="8,8" } +... + +--- +name: full_copy +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 2, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 4, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 5, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 6, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0: + ; GFX950-LABEL: name: full_copy + ; GFX950: liveins: $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29 + ; GFX950-NEXT: {{ $}} + ; GFX950-NEXT: renamable $agpr0_agpr1 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit-def $vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit killed $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr8, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr9, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr10, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr11, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr12, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr13, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr14, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr15, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr16, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr17, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr18, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr19, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr20, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr21, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr22, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr23, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr24, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr25, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr26, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr27, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr28, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr29, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + ; GFX950-NEXT: S_ENDPGM 0 + renamable $agpr0_agpr1 = IMPLICIT_DEF + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + SI_SPILL_AV128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr4_vgpr5_vgpr6_vgpr7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr8_vgpr9_vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr12_vgpr13_vgpr14_vgpr15, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr20_vgpr21_vgpr22_vgpr23, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) + $vgpr0 = IMPLICIT_DEF + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + S_ENDPGM 0 +... + + +# We need to add implicit operand as well as implicit-def operand to the scratch_load, otherwise, MachineCopyPropagation will think the preceeding copies are dead, and will delete them. + +--- +name: partial_copy +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 2, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 4, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 5, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 6, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0: + ; GFX950-LABEL: name: partial_copy + ; GFX950: liveins: $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27 + ; GFX950-NEXT: {{ $}} + ; GFX950-NEXT: renamable $agpr0_agpr1 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit-def $vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr20_vgpr21, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr20_vgpr21_vgpr22_vgpr23 :: (store (s64) into %stack.5, align 4, addrspace 5) + ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr8, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr9, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr10, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr11, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr12, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr13, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr14, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr15, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr16, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr17, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr18, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr19, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr20, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr21, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr22, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr23, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr24, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr25, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr26, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr27, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2_agpr3 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr2_agpr3_agpr4_agpr5 :: (load (s64) from %stack.5, align 4, addrspace 5) + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + ; GFX950-NEXT: S_ENDPGM 0 + renamable $agpr0_agpr1 = IMPLICIT_DEF + renamable $agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + SI_SPILL_AV128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr4_vgpr5_vgpr6_vgpr7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr8_vgpr9_vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr12_vgpr13_vgpr14_vgpr15, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr20_vgpr21_vgpr22_vgpr23, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) + $vgpr0 = IMPLICIT_DEF + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + S_ENDPGM 0 +... + +# Since there are no preceeding copies , we do not need to add implicit operand, as the implicit-def operand does not clobber. + +--- +name: full_spill +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 2, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 4, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 5, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 6, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0: + ; GFX950-LABEL: name: full_spill + ; GFX950: liveins: $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25 + ; GFX950-NEXT: {{ $}} + ; GFX950-NEXT: renamable $agpr0_agpr1 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $agpr26_agpr27 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit-def $vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.5, align 4, addrspace 5) + ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr8, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr9, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr10, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr11, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr12, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr13, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr14, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr15, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr16, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr17, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr18, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr19, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr20, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr21, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr22, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr23, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr24, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr25, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + ; GFX950-NEXT: $agpr2_agpr3_agpr4_agpr5 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.5, align 4, addrspace 5) + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + ; GFX950-NEXT: S_ENDPGM 0 + renamable $agpr0_agpr1 = IMPLICIT_DEF + renamable $agpr26_agpr27 = IMPLICIT_DEF + renamable $agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + SI_SPILL_AV128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr4_vgpr5_vgpr6_vgpr7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr8_vgpr9_vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr12_vgpr13_vgpr14_vgpr15, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr20_vgpr21_vgpr22_vgpr23, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) + $vgpr0 = IMPLICIT_DEF + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir index 52593e0..beeb9b2 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir @@ -19,7 +19,7 @@ body: | ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29, implicit $agpr30 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -46,7 +46,7 @@ body: | ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -75,7 +75,7 @@ body: | ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -129,7 +129,7 @@ body: | ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53, implicit $vgpr54 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -156,7 +156,7 @@ body: | ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -185,7 +185,7 @@ body: | ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir index 1eabe62..e1d3ebc 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir @@ -38,6 +38,22 @@ define amdgpu_gs void @mask_hazard_no_hazard1() { ret void } define amdgpu_gs void @mask_hazard_no_hazard2() { ret void } define amdgpu_gs void @mask_hazard_no_hazard3() { ret void } + define amdgpu_gs void @mask_hazard_cancel_hazard1() { ret void } + define amdgpu_gs void @mask_hazard_cancel_hazard2() { ret void } + define amdgpu_gs void @mask_hazard_cancel_hazard3() { ret void } + define amdgpu_gs void @mask_hazard_cancel_hazard4() { ret void } + define amdgpu_gs void @mask_hazard_partial_cancel1() { ret void } + define amdgpu_gs void @mask_hazard_partial_cancel2() { ret void } + define amdgpu_gs void @mask_hazard_partial_cancel3() { ret void } + define amdgpu_gs void @mask_hazard_partial_cancel4() { ret void } + define amdgpu_gs void @mask_hazard_valu_readlane1() { ret void } + define amdgpu_gs void @mask_hazard_valu_readlane2() { ret void } + define amdgpu_gs void @mask_hazard_valu_readlane3() { ret void } + define amdgpu_gs void @mask_hazard_valu_readfirstlane() { ret void } + define amdgpu_gs void @mask_hazard_valu_vcmp_vcc() { ret void } + define amdgpu_gs void @mask_hazard_valu_vcmp_sgpr() { ret void } + define amdgpu_gs void @mask_hazard_combine1() { ret void } + define amdgpu_gs void @mask_hazard_combine2() { ret void } ... --- @@ -487,8 +503,8 @@ body: | ; GFX11-LABEL: name: mask_hazard_subreg3 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subreg3 @@ -655,3 +671,373 @@ body: | $vgpr2 = V_MOV_B32_e32 0, implicit $exec S_ENDPGM 0 ... + +--- +name: mask_hazard_cancel_hazard1 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_cancel_hazard1 + ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GCN-NEXT: $vcc_lo = S_MOV_B32 0 + ; GCN-NEXT: $vcc_hi = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo + ; GCN-NEXT: $vcc = S_MOV_B64 1 + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vcc_lo = S_MOV_B32 0 + $vcc_hi = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 $vcc_lo + $vcc = S_MOV_B64 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_cancel_hazard2 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_cancel_hazard2 + ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GCN-NEXT: $vcc = S_MOV_B64 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo + ; GCN-NEXT: $vcc = S_MOV_B64 1 + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vcc = S_MOV_B64 0 + $sgpr0 = S_MOV_B32 $vcc_lo + $vcc = S_MOV_B64 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_cancel_hazard3 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_cancel_hazard3 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 + ; GCN-NEXT: $sgpr4 = S_MOV_B32 $sgpr0 + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $sgpr0_sgpr1 = S_MOV_B64 0 + $sgpr4 = S_MOV_B32 $sgpr0 + $sgpr0_sgpr1 = S_MOV_B64 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_cancel_hazard4 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_cancel_hazard4 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 + ; GCN-NEXT: $sgpr4 = S_MOV_B32 $sgpr0 + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr4 = S_MOV_B32 $sgpr0 + $sgpr0_sgpr1 = S_MOV_B64 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_partial_cancel1 +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_partial_cancel1 + ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX11-NEXT: $vcc_lo = S_MOV_B32 0 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX11-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo + ; GFX11-NEXT: $vcc = S_MOV_B64 1 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_partial_cancel1 + ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX12-NEXT: $vcc_lo = S_MOV_B32 0 + ; GFX12-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX12-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo + ; GFX12-NEXT: $vcc = S_MOV_B64 1 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vcc_lo = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 $vcc_lo + $vcc = S_MOV_B64 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_partial_cancel2 +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_partial_cancel2 + ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX11-NEXT: $vcc_hi = S_MOV_B32 0 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX11-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo + ; GFX11-NEXT: $vcc = S_MOV_B64 1 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_partial_cancel2 + ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX12-NEXT: $vcc_hi = S_MOV_B32 0 + ; GFX12-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX12-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo + ; GFX12-NEXT: $vcc = S_MOV_B64 1 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vcc_hi = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 $vcc_lo + $vcc = S_MOV_B64 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_partial_cancel3 +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_partial_cancel3 + ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX11-NEXT: $sgpr3 = S_MOV_B32 $sgpr0 + ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_partial_cancel3 + ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0 + ; GFX12-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX12-NEXT: $sgpr3 = S_MOV_B32 $sgpr0 + ; GFX12-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $sgpr0 = S_MOV_B32 0 + $sgpr3 = S_MOV_B32 $sgpr0 + $sgpr0_sgpr1 = S_MOV_B64 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_partial_cancel4 +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_partial_cancel4 + ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX11-NEXT: $sgpr3 = S_MOV_B32 $sgpr1 + ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_partial_cancel4 + ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GFX12-NEXT: $sgpr1 = S_MOV_B32 0 + ; GFX12-NEXT: S_WAITCNT_DEPCTR 65534 + ; GFX12-NEXT: $sgpr3 = S_MOV_B32 $sgpr1 + ; GFX12-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $sgpr1 = S_MOV_B32 0 + $sgpr3 = S_MOV_B32 $sgpr1 + $sgpr0_sgpr1 = S_MOV_B64 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_valu_readlane1 +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_valu_readlane1 + ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX11-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_valu_readlane1 + ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX12-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr2 = V_READLANE_B32 $vgpr3, 0 + S_ENDPGM 0 +... + +--- +name: mask_hazard_valu_readlane2 +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_valu_readlane2 + ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX11-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_valu_readlane2 + ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX12-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr3 = V_READLANE_B32 $vgpr3, 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_valu_readlane3 +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_valu_readlane3 + ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX11-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0 + ; GFX11-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_valu_readlane3 + ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX12-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0 + ; GFX12-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr2 = V_READLANE_B32 $vgpr3, 0 + $sgpr3 = V_READLANE_B32 $vgpr3, 1 + S_ENDPGM 0 +... + +--- +name: mask_hazard_valu_readfirstlane +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_valu_readfirstlane + ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX11-NEXT: $sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_valu_readfirstlane + ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX12-NEXT: $sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec + S_ENDPGM 0 +... + +--- +name: mask_hazard_valu_vcmp_vcc +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_valu_vcmp_vcc + ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65533 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_valu_vcmp_vcc + ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec + S_ENDPGM 0 +... + +--- +name: mask_hazard_valu_vcmp_sgpr +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_valu_vcmp_sgpr + ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_valu_vcmp_sgpr + ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec + S_ENDPGM 0 +... + +--- +name: mask_hazard_combine1 +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_combine1 + ; GFX11: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec + ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0 + ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 61948 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_combine1 + ; GFX12: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX12-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GFX12-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec + ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr1 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec + S_ENDPGM 0 +... + +--- +name: mask_hazard_combine2 +body: | + bb.0: + ; GFX11-LABEL: name: mask_hazard_combine2 + ; GFX11: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec + ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0 + ; GFX11-NEXT: S_WAITCNT_DEPCTR 65532 + ; GFX11-NEXT: $sgpr1 = S_MOV_B32 $sgpr4 + ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec + ; GFX11-NEXT: S_WAITCNT_DEPCTR 61950 + ; GFX11-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: mask_hazard_combine2 + ; GFX12: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GFX12-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GFX12-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec + ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0 + ; GFX12-NEXT: $sgpr1 = S_MOV_B32 $sgpr4 + ; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 $sgpr4 + $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir index 2fac3d2..69cf924 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir +++ b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir @@ -22,7 +22,7 @@ body: | ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr51 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -49,7 +49,7 @@ body: | ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr51 = COPY $vgpr54, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 ; GCN-NEXT: $vgpr50 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -78,7 +78,7 @@ body: | ; GCN-NEXT: $vgpr51 = COPY $vgpr53, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 ; GCN-NEXT: $vgpr50 = COPY $vgpr54, implicit $vgpr48_vgpr49_vgpr50_vgpr51 ; GCN-NEXT: $vgpr49 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -132,7 +132,7 @@ body: | ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -159,7 +159,7 @@ body: | ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 ; GCN-NEXT: $agpr28 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 - ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -188,7 +188,7 @@ body: | ; GCN-NEXT: $agpr29 = COPY $agpr25, implicit-def $agpr26_agpr27_agpr28_agpr29 ; GCN-NEXT: $agpr28 = COPY $agpr30, implicit $agpr26_agpr27_agpr28_agpr29 ; GCN-NEXT: $agpr27 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 - ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll index 2d7a91f..985bcbd 100644 --- a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll +++ b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll @@ -1,22 +1,50 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s define amdgpu_ps void @intrinsic_store_system_scope(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { -; GFX12-LABEL: intrinsic_store_system_scope: -; GFX12: ; %bb.0: -; GFX12-NEXT: buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS -; GFX12-NEXT: s_endpgm +; GFX1200-LABEL: intrinsic_store_system_scope: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS +; GFX1200-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: intrinsic_store_system_scope: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-SDAG-NEXT: buffer_store_b32 v0, v[2:3], s[0:3], s4 idxen offen scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: intrinsic_store_system_scope: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2 +; GFX1250-GISEL-NEXT: buffer_store_b32 v0, v[4:5], s[0:3], s4 idxen offen scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_endpgm call void @llvm.amdgcn.struct.buffer.store.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 24) ret void } define amdgpu_ps void @generic_store_volatile(i32 %val, ptr addrspace(1) %out) { -; GFX12-LABEL: generic_store_volatile: -; GFX12: ; %bb.0: -; GFX12-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_SYS -; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: s_endpgm +; GFX1200-LABEL: generic_store_volatile: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: global_store_b32 v[1:2], v0, off scope:SCOPE_SYS +; GFX1200-NEXT: s_wait_storecnt 0x0 +; GFX1200-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: generic_store_volatile: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-SDAG-NEXT: global_store_b32 v[2:3], v0, off scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0 +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: generic_store_volatile: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2 +; GFX1250-GISEL-NEXT: global_store_b32 v[4:5], v0, off scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0 +; GFX1250-GISEL-NEXT: s_endpgm store volatile i32 %val, ptr addrspace(1) %out ret void } |
