aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir131
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll38
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/ctlz.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll9
-rw-r--r--llvm/test/CodeGen/AMDGPU/cttz.ll2
7 files changed, 162 insertions, 32 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
new file mode 100644
index 0000000..0f43612
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -0,0 +1,131 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRE
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-POST
+
+---
+name: test_combine_trunc_select
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_trunc_select
+ ; CHECK-PRE: %cond:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %lhs:_(s64) = COPY $x0
+ ; CHECK-PRE-NEXT: %rhs:_(s64) = COPY $x0
+ ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
+ ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %rhs(s64)
+ ; CHECK-PRE-NEXT: %small:_(s32) = G_SELECT %cond(s32), [[TRUNC]], [[TRUNC1]]
+ ; CHECK-PRE-NEXT: $w0 = COPY %small(s32)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_trunc_select
+ ; CHECK-POST: %cond:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %lhs:_(s64) = COPY $x0
+ ; CHECK-POST-NEXT: %rhs:_(s64) = COPY $x0
+ ; CHECK-POST-NEXT: %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-POST-NEXT: %small:_(s32) = G_TRUNC %res(s64)
+ ; CHECK-POST-NEXT: $w0 = COPY %small(s32)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = COPY $x0
+ %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s64)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_zext_select
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_zext_select
+ ; CHECK-PRE: %cond:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %lhs(s32)
+ ; CHECK-PRE-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %rhs(s32)
+ ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ZEXT]], [[ZEXT1]]
+ ; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_zext_select
+ ; CHECK-POST: %cond:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-POST-NEXT: %big:_(s64) = G_ZEXT %res(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %big(s64)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s32) = COPY $w0
+ %rhs:_(s32) = COPY $w0
+ %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ %big:_(s64) = G_ZEXT %res(s32)
+ $x0 = COPY %big(s64)
+...
+---
+name: test_combine_anyzext_select
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_anyzext_select
+ ; CHECK-PRE: %cond:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs(s32)
+ ; CHECK-PRE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs(s32)
+ ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_anyzext_select
+ ; CHECK-POST: %cond:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-POST-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %big(s64)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s32) = COPY $w0
+ %rhs:_(s32) = COPY $w0
+ %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ %big:_(s64) = G_ANYEXT %res(s32)
+ $x0 = COPY %big(s64)
+...
+---
+name: test_combine_anyzext_select_multi_use
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_anyzext_select_multi_use
+ ; CHECK: %cond:_(s32) = COPY $w0
+ ; CHECK-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
+ ; CHECK-NEXT: $x0 = COPY %big(s64)
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s32) = COPY $w0
+ %rhs:_(s32) = COPY $w0
+ %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ %big:_(s64) = G_ANYEXT %res(s32)
+ $x0 = COPY %big(s64)
+ $w0 = COPY %res(s32)
+...
+---
+name: test_combine_trunc_select_vector_out_of_budget
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_select_vector_out_of_budget
+ ; CHECK: %cond:_(<2 x s32>) = COPY $x0
+ ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %arg2:_(s64) = COPY $x0
+ ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ ; CHECK-NEXT: %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
+ ; CHECK-NEXT: %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
+ ; CHECK-NEXT: %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
+ ; CHECK-NEXT: $x0 = COPY %small(<2 x s32>)
+ %cond:_(<2 x s32>) = COPY $x0
+ %arg1:_(s64) = COPY $x0
+ %arg2:_(s64) = COPY $x0
+ %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
+ %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
+ %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
+ $x0 = COPY %small(<2 x s32>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index ec832ed..63f5464 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1845,39 +1845,37 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
-; GCN-NEXT: s_ashr_i32 s8, s5, 31
+; GCN-NEXT: s_ashr_i32 s7, s5, 31
; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
; GCN-NEXT: s_cmp_lg_u32 s12, 0
-; GCN-NEXT: s_mov_b32 s9, s8
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_cmp_lg_u32 s11, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], s[8:9]
+; GCN-NEXT: s_cselect_b32 s2, s6, s7
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_ashr_i65:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
-; GFX10PLUS-NEXT: s_sub_i32 s12, s3, 64
-; GFX10PLUS-NEXT: s_sub_i32 s8, 64, s3
+; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
+; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3
; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
-; GFX10PLUS-NEXT: s_cselect_b32 s13, 1, 0
+; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0
-; GFX10PLUS-NEXT: s_cselect_b32 s14, 1, 0
-; GFX10PLUS-NEXT: s_ashr_i64 s[6:7], s[4:5], s3
-; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
-; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
-; GFX10PLUS-NEXT: s_ashr_i32 s10, s5, 31
-; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
-; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s12
-; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
-; GFX10PLUS-NEXT: s_mov_b32 s11, s10
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
-; GFX10PLUS-NEXT: s_cmp_lg_u32 s14, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
-; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[6:7], s[10:11]
+; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
+; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3
+; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s2
+; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[4:5], s3
+; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
+; GFX10PLUS-NEXT: s_ashr_i32 s3, s5, 31
+; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
+; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
+; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
+; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
+; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
+; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
+; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = ashr i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 980ba3d..5dd4fa0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1766,7 +1766,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_cmp_lg_u32 s12, 0
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_cmp_lg_u32 s11, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
+; GCN-NEXT: s_cselect_b32 s2, s6, 0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_lshr_i65:
@@ -1788,7 +1788,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
+; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, 0
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = lshr i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
index c2f911c..4cf1c92 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
@@ -1733,9 +1733,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s10
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
-; GCN-NEXT: s_cselect_b64 s[4:5], s[6:7], s[8:9]
+; GCN-NEXT: s_cselect_b32 s3, s6, s8
; GCN-NEXT: s_cmp_lg_u32 s12, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
+; GCN-NEXT: s_cselect_b32 s2, s2, s3
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_shl_i65:
@@ -1753,9 +1753,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[0:1], s10
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[8:9], 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
+; GFX10PLUS-NEXT: s_cselect_b32 s3, s4, s6
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
+; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = shl i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index ba0a1e7..a0b5497 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -1593,7 +1593,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index a55c8cd..2168e7f 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -1706,11 +1706,12 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v3, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v0
-; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[0:1], v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, s[0:1]
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v0
+; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa vcc, v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[4:5]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index 57fe6cd..14e6c4b 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -1359,7 +1359,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x100, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_sdwa s0, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, s0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, s0
; GFX10-GISEL-NEXT: global_store_byte v2, v0, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()