diff options
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir | 131 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll | 38 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ctlz.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/cttz.ll | 2 |
7 files changed, 162 insertions, 32 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir new file mode 100644 index 0000000..0f43612 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir @@ -0,0 +1,131 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRE +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-POST + +--- +name: test_combine_trunc_select +legalized: true +body: | + bb.1: + ; CHECK-PRE-LABEL: name: test_combine_trunc_select + ; CHECK-PRE: %cond:_(s32) = COPY $w0 + ; CHECK-PRE-NEXT: %lhs:_(s64) = COPY $x0 + ; CHECK-PRE-NEXT: %rhs:_(s64) = COPY $x0 + ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64) + ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %rhs(s64) + ; CHECK-PRE-NEXT: %small:_(s32) = G_SELECT %cond(s32), [[TRUNC]], [[TRUNC1]] + ; CHECK-PRE-NEXT: $w0 = COPY %small(s32) + ; + ; CHECK-POST-LABEL: name: test_combine_trunc_select + ; CHECK-POST: %cond:_(s32) = COPY $w0 + ; CHECK-POST-NEXT: %lhs:_(s64) = COPY $x0 + ; CHECK-POST-NEXT: %rhs:_(s64) = COPY $x0 + ; CHECK-POST-NEXT: %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs + ; CHECK-POST-NEXT: %small:_(s32) = G_TRUNC %res(s64) + ; CHECK-POST-NEXT: $w0 = COPY %small(s32) + %cond:_(s32) = COPY $w0 + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = COPY $x0 + %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs + %small:_(s32) = G_TRUNC %res(s64) + $w0 = COPY %small(s32) +... +--- +name: test_combine_zext_select +legalized: true +body: | + bb.1: + ; CHECK-PRE-LABEL: name: test_combine_zext_select + ; CHECK-PRE: %cond:_(s32) = COPY $w0 + ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0 + ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0 + ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %lhs(s32) + ; CHECK-PRE-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %rhs(s32) + ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ZEXT]], [[ZEXT1]] + ; CHECK-PRE-NEXT: $x0 = COPY %big(s64) + ; + ; CHECK-POST-LABEL: name: test_combine_zext_select + ; CHECK-POST: %cond:_(s32) = COPY $w0 + ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0 + ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0 + ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs + ; CHECK-POST-NEXT: %big:_(s64) = G_ZEXT %res(s32) + ; CHECK-POST-NEXT: $x0 = COPY %big(s64) + %cond:_(s32) = COPY $w0 + %lhs:_(s32) = COPY $w0 + %rhs:_(s32) = COPY $w0 + %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs + %big:_(s64) = G_ZEXT %res(s32) + $x0 = COPY %big(s64) +... +--- +name: test_combine_anyzext_select +legalized: true +body: | + bb.1: + ; CHECK-PRE-LABEL: name: test_combine_anyzext_select + ; CHECK-PRE: %cond:_(s32) = COPY $w0 + ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0 + ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0 + ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs(s32) + ; CHECK-PRE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs(s32) + ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ANYEXT]], [[ANYEXT1]] + ; CHECK-PRE-NEXT: $x0 = COPY %big(s64) + ; + ; CHECK-POST-LABEL: name: test_combine_anyzext_select + ; CHECK-POST: %cond:_(s32) = COPY $w0 + ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0 + ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0 + ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs + ; CHECK-POST-NEXT: %big:_(s64) = G_ANYEXT %res(s32) + ; CHECK-POST-NEXT: $x0 = COPY %big(s64) + %cond:_(s32) = COPY $w0 + %lhs:_(s32) = COPY $w0 + %rhs:_(s32) = COPY $w0 + %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs + %big:_(s64) = G_ANYEXT %res(s32) + $x0 = COPY %big(s64) +... +--- +name: test_combine_anyzext_select_multi_use +legalized: true +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_anyzext_select_multi_use + ; CHECK: %cond:_(s32) = COPY $w0 + ; CHECK-NEXT: %lhs:_(s32) = COPY $w0 + ; CHECK-NEXT: %rhs:_(s32) = COPY $w0 + ; CHECK-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs + ; CHECK-NEXT: %big:_(s64) = G_ANYEXT %res(s32) + ; CHECK-NEXT: $x0 = COPY %big(s64) + ; CHECK-NEXT: $w0 = COPY %res(s32) + %cond:_(s32) = COPY $w0 + %lhs:_(s32) = COPY $w0 + %rhs:_(s32) = COPY $w0 + %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs + %big:_(s64) = G_ANYEXT %res(s32) + $x0 = COPY %big(s64) + $w0 = COPY %res(s32) +... +--- +name: test_combine_trunc_select_vector_out_of_budget +legalized: true +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_trunc_select_vector_out_of_budget + ; CHECK: %cond:_(<2 x s32>) = COPY $x0 + ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 + ; CHECK-NEXT: %arg2:_(s64) = COPY $x0 + ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + ; CHECK-NEXT: %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64) + ; CHECK-NEXT: %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2 + ; CHECK-NEXT: %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>) + ; CHECK-NEXT: $x0 = COPY %small(<2 x s32>) + %cond:_(<2 x s32>) = COPY $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x0 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64) + %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2 + %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>) + $x0 = COPY %small(<2 x s32>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll index ec832ed..63f5464 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll @@ -1845,39 +1845,37 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) { ; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3 ; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 ; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] -; GCN-NEXT: s_ashr_i32 s8, s5, 31 +; GCN-NEXT: s_ashr_i32 s7, s5, 31 ; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], s10 ; GCN-NEXT: s_cmp_lg_u32 s11, 0 ; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; GCN-NEXT: s_cmp_lg_u32 s12, 0 -; GCN-NEXT: s_mov_b32 s9, s8 ; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] ; GCN-NEXT: s_cmp_lg_u32 s11, 0 -; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], s[8:9] +; GCN-NEXT: s_cselect_b32 s2, s6, s7 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_ashr_i65: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000 -; GFX10PLUS-NEXT: s_sub_i32 s12, s3, 64 -; GFX10PLUS-NEXT: s_sub_i32 s8, 64, s3 +; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64 +; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3 ; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64 -; GFX10PLUS-NEXT: s_cselect_b32 s13, 1, 0 +; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0 ; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0 -; GFX10PLUS-NEXT: s_cselect_b32 s14, 1, 0 -; GFX10PLUS-NEXT: s_ashr_i64 s[6:7], s[4:5], s3 -; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[0:1], s3 -; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 -; GFX10PLUS-NEXT: s_ashr_i32 s10, s5, 31 -; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] -; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s12 -; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0 -; GFX10PLUS-NEXT: s_mov_b32 s11, s10 -; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] -; GFX10PLUS-NEXT: s_cmp_lg_u32 s14, 0 -; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] -; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0 -; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[6:7], s[10:11] +; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0 +; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3 +; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s2 +; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[4:5], s3 +; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GFX10PLUS-NEXT: s_ashr_i32 s3, s5, 31 +; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s10 +; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0 +; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] +; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0 +; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] +; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0 +; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3 ; GFX10PLUS-NEXT: ; return to shader part epilog %result = ashr i65 %value, %amount ret i65 %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll index 980ba3d..5dd4fa0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll @@ -1766,7 +1766,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) { ; GCN-NEXT: s_cmp_lg_u32 s12, 0 ; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] ; GCN-NEXT: s_cmp_lg_u32 s11, 0 -; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], 0 +; GCN-NEXT: s_cselect_b32 s2, s6, 0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_lshr_i65: @@ -1788,7 +1788,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) { ; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0 ; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] ; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0 -; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 +; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, 0 ; GFX10PLUS-NEXT: ; return to shader part epilog %result = lshr i65 %value, %amount ret i65 %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll index c2f911c..4cf1c92 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll @@ -1733,9 +1733,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) { ; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s10 ; GCN-NEXT: s_cmp_lg_u32 s11, 0 ; GCN-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 -; GCN-NEXT: s_cselect_b64 s[4:5], s[6:7], s[8:9] +; GCN-NEXT: s_cselect_b32 s3, s6, s8 ; GCN-NEXT: s_cmp_lg_u32 s12, 0 -; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] +; GCN-NEXT: s_cselect_b32 s2, s2, s3 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_shl_i65: @@ -1753,9 +1753,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) { ; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[0:1], s10 ; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0 ; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[8:9], 0 -; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] +; GFX10PLUS-NEXT: s_cselect_b32 s3, s4, s6 ; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0 -; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] +; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3 ; GFX10PLUS-NEXT: ; return to shader part epilog %result = shl i65 %value, %amount ret i65 %result diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll index ba0a1e7..a0b5497 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll @@ -1593,7 +1593,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 ; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[4:5] ; GFX10-GISEL-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index a55c8cd..2168e7f 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -1706,11 +1706,12 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v3, vcc ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v0 -; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[0:1], v0, v1 src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, s[0:1] +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v0 +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa vcc, v0, v1 src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc ; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[4:5] ; GFX9-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll index 57fe6cd..14e6c4b 100644 --- a/llvm/test/CodeGen/AMDGPU/cttz.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz.ll @@ -1359,7 +1359,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x100, v0 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_sdwa s0, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, s0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, s0 ; GFX10-GISEL-NEXT: global_store_byte v2, v0, s[4:5] ; GFX10-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() |