diff options
author | Aaditya <Aaditya.AlokDeshpande@amd.com> | 2025-09-10 14:41:18 +0530 |
---|---|---|
committer | Aaditya <Aaditya.AlokDeshpande@amd.com> | 2025-09-10 18:05:39 +0530 |
commit | 6497e4f1e2882d583404586d43d553138ecf2968 (patch) | |
tree | b65094ad3f05dc30d677f462704696d462162a23 | |
parent | eb9b409144edacf5283359db19a9c730f9c71688 (diff) | |
download | llvm-users/easyonaadit/amdgpu/wave-reduce-intrinsics-arithmetic.zip llvm-users/easyonaadit/amdgpu/wave-reduce-intrinsics-arithmetic.tar.gz llvm-users/easyonaadit/amdgpu/wave-reduce-intrinsics-arithmetic.tar.bz2 |
removing unused variableusers/easyonaadit/amdgpu/wave-reduce-intrinsics-arithmetic
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll | 188 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll | 188 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll | 296 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll | 172 |
5 files changed, 0 insertions, 853 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 8e02177..4bb189e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5407,15 +5407,6 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI, RetBB = &BB; break; } - case AMDGPU::V_CMP_LT_U64_e64: // umin - case AMDGPU::V_CMP_LT_I64_e64: // min - case AMDGPU::V_CMP_GT_U64_e64: // umax - case AMDGPU::V_CMP_GT_I64_e64: { // max - // Idempotent operations. - BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B64), DstReg).addReg(SrcReg); - RetBB = &BB; - break; - } case AMDGPU::S_XOR_B32: case AMDGPU::S_ADD_I32: case AMDGPU::S_ADD_U64_PSEUDO: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll index f381a82..ace65a0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll @@ -1269,21 +1269,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1 -; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1 -; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1306,21 +1294,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-LABEL: divergent_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: s_brev_b32 s5, 1 -; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: s_brev_b32 s5, 1 -; GFX8GISEL-NEXT: s_mov_b32 s4, 0 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 ; GFX8GISEL-NEXT: s_brev_b32 s5, 1 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1343,21 +1319,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-LABEL: divergent_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1 -; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1 -; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1380,21 +1344,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-LABEL: divergent_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: s_brev_b32 s5, 1 -; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: s_brev_b32 s5, 1 -; GFX9GISEL-NEXT: s_mov_b32 s4, 0 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 ; GFX9GISEL-NEXT: s_brev_b32 s5, 1 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1417,21 +1369,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-LABEL: divergent_value_i64: ; GFX1064DAGISEL: ; %bb.0: ; %entry ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1 -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1453,21 +1393,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-LABEL: divergent_value_i64: ; GFX1064GISEL: ; %bb.0: ; %entry ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064GISEL-NEXT: s_brev_b32 s5, 1 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: s_brev_b32 s5, 1 -; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: s_brev_b32 s5, 1 -; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1489,21 +1417,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-LABEL: divergent_value_i64: ; GFX1032DAGISEL: ; %bb.0: ; %entry ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1 -; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -======= -; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1 -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1525,21 +1441,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-LABEL: divergent_value_i64: ; GFX1032GISEL: ; %bb.0: ; %entry ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1032GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1032GISEL-NEXT: s_brev_b32 s5, 1 -; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -======= -; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: s_brev_b32 s5, 1 -; GFX1032GISEL-NEXT: s_mov_b32 s4, 0 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1032GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032GISEL-NEXT: s_brev_b32 s5, 1 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1561,39 +1465,16 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-LABEL: divergent_value_i64: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0 -; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -======= -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1 -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1 ; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s8 ; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v3, s8 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164DAGISEL-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[4:5] ; GFX1164DAGISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3] ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 @@ -1609,39 +1490,16 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-LABEL: divergent_value_i64: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1164GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1164GISEL-NEXT: s_brev_b32 s1, 1 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -======= -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_brev_b32 s1, 1 -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164GISEL-NEXT: s_brev_b32 s1, 1 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1 ; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s8 ; GFX1164GISEL-NEXT: v_readlane_b32 s5, v3, s8 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164GISEL-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[4:5] ; GFX1164GISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3] ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 @@ -1657,38 +1515,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-LABEL: divergent_value_i64: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -======= -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1 -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v3, s3 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132DAGISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[4:5] ; GFX1132DAGISEL-NEXT: s_and_b32 s6, vcc_lo, s2 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 @@ -1703,38 +1538,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-LABEL: divergent_value_i64: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: s_brev_b32 s1, 1 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -======= -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_brev_b32 s1, 1 -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132GISEL-NEXT: s_brev_b32 s1, 1 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v3, s3 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[4:5] ; GFX1132GISEL-NEXT: s_and_b32 s6, vcc_lo, s2 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll index 7a83d7f..b12537e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll @@ -1269,21 +1269,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX8DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: s_brev_b32 s5, -2 -; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX8DAGISEL-NEXT: s_brev_b32 s5, -2 -; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1306,21 +1294,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-LABEL: divergent_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX8GISEL-NEXT: s_mov_b32 s4, -1 -; GFX8GISEL-NEXT: s_brev_b32 s5, -2 -; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: s_brev_b32 s5, -2 -; GFX8GISEL-NEXT: s_mov_b32 s4, -1 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX8GISEL-NEXT: s_mov_b32 s4, -1 ; GFX8GISEL-NEXT: s_brev_b32 s5, -2 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1343,21 +1319,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-LABEL: divergent_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX9DAGISEL-NEXT: s_brev_b32 s5, -2 -; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: s_brev_b32 s5, -2 -; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX9DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1380,21 +1344,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-LABEL: divergent_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX9GISEL-NEXT: s_mov_b32 s4, -1 -; GFX9GISEL-NEXT: s_brev_b32 s5, -2 -; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: s_brev_b32 s5, -2 -; GFX9GISEL-NEXT: s_mov_b32 s4, -1 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX9GISEL-NEXT: s_mov_b32 s4, -1 ; GFX9GISEL-NEXT: s_brev_b32 s5, -2 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1417,21 +1369,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-LABEL: divergent_value_i64: ; GFX1064DAGISEL: ; %bb.0: ; %entry ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064DAGISEL-NEXT: s_brev_b32 s5, -2 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: s_brev_b32 s5, -2 -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX1064DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1453,21 +1393,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-LABEL: divergent_value_i64: ; GFX1064GISEL: ; %bb.0: ; %entry ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 ; GFX1064GISEL-NEXT: s_brev_b32 s5, -2 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: s_brev_b32 s5, -2 -; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064GISEL-NEXT: s_brev_b32 s5, -2 -; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1489,21 +1417,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-LABEL: divergent_value_i64: ; GFX1032DAGISEL: ; %bb.0: ; %entry ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1032DAGISEL-NEXT: s_brev_b32 s5, -2 -; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -======= -; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: s_brev_b32 s5, -2 -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX1032DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1525,21 +1441,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-LABEL: divergent_value_i64: ; GFX1032GISEL: ; %bb.0: ; %entry ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1032GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1032GISEL-NEXT: s_brev_b32 s5, -2 -; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -======= -; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: s_brev_b32 s5, -2 -; GFX1032GISEL-NEXT: s_mov_b32 s4, -1 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1032GISEL-NEXT: s_mov_b32 s4, -1 ; GFX1032GISEL-NEXT: s_brev_b32 s5, -2 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1561,39 +1465,16 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-LABEL: divergent_value_i64: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1 -; GFX1164DAGISEL-NEXT: s_brev_b32 s1, -2 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -======= -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_brev_b32 s1, -2 -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1 ; GFX1164DAGISEL-NEXT: s_brev_b32 s1, -2 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1 ; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s8 ; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v3, s8 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164DAGISEL-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[4:5] ; GFX1164DAGISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3] ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 @@ -1609,39 +1490,16 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-LABEL: divergent_value_i64: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1164GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1164GISEL-NEXT: s_brev_b32 s1, -2 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -======= -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_brev_b32 s1, -2 -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164GISEL-NEXT: s_mov_b32 s0, -1 ; GFX1164GISEL-NEXT: s_brev_b32 s1, -2 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1 ; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s8 ; GFX1164GISEL-NEXT: v_readlane_b32 s5, v3, s8 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164GISEL-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[4:5] ; GFX1164GISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3] ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 @@ -1657,38 +1515,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-LABEL: divergent_value_i64: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1 -; GFX1132DAGISEL-NEXT: s_brev_b32 s1, -2 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -======= -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_brev_b32 s1, -2 -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1 ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, -2 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v3, s3 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132DAGISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[4:5] ; GFX1132DAGISEL-NEXT: s_and_b32 s6, vcc_lo, s2 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 @@ -1703,38 +1538,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-LABEL: divergent_value_i64: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1132GISEL-NEXT: s_brev_b32 s1, -2 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -======= -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_brev_b32 s1, -2 -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 ; GFX1132GISEL-NEXT: s_brev_b32 s1, -2 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v3, s3 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132GISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[4:5] ; GFX1132GISEL-NEXT: s_and_b32 s6, vcc_lo, s2 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll index 3fc5bb1..1f848d5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll @@ -1194,8 +1194,6 @@ entry: ret void } -<<<<<<< HEAD -<<<<<<< HEAD define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry @@ -1476,300 +1474,6 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off ; GFX1132GISEL-NEXT: s_setpc_b64 s[30:31] -======= -define amdgpu_kernel void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { -======= -define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) -; GFX8DAGISEL-LABEL: divergent_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0 -; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5 -; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s12 -; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s12 -; GFX8DAGISEL-NEXT: v_cmp_gt_u64_e32 vcc, s[8:9], v[4:5] -; GFX8DAGISEL-NEXT: s_and_b64 s[10:11], vcc, s[6:7] -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 -; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX8DAGISEL-NEXT: ; %bb.2: -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) -; GFX8DAGISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX8GISEL-LABEL: divergent_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0 -; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] -; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5 -; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s12 -; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s12 -; GFX8GISEL-NEXT: v_cmp_gt_u64_e32 vcc, s[8:9], v[4:5] -; GFX8GISEL-NEXT: s_and_b64 s[10:11], vcc, s[6:7] -; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12 -; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX8GISEL-NEXT: ; %bb.2: -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX8GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9DAGISEL-LABEL: divergent_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0 -; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5 -; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s12 -; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s12 -; GFX9DAGISEL-NEXT: v_cmp_gt_u64_e32 vcc, s[8:9], v[4:5] -; GFX9DAGISEL-NEXT: s_and_b64 s[10:11], vcc, s[6:7] -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 -; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX9DAGISEL-NEXT: ; %bb.2: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9DAGISEL-NEXT: global_store_dwordx2 v[0:1], v[2:3], off -; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) -; GFX9DAGISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9GISEL-LABEL: divergent_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0 -; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] -; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5 -; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s12 -; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s12 -; GFX9GISEL-NEXT: v_cmp_gt_u64_e32 vcc, s[8:9], v[4:5] -; GFX9GISEL-NEXT: s_and_b64 s[10:11], vcc, s[6:7] -; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12 -; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX9GISEL-NEXT: ; %bb.2: -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9GISEL-NEXT: global_store_dwordx2 v[0:1], v[2:3], off -; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX9GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1064DAGISEL-LABEL: divergent_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5 -; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s12 -; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s12 -; GFX1064DAGISEL-NEXT: v_cmp_gt_u64_e32 vcc, s[8:9], v[4:5] -; GFX1064DAGISEL-NEXT: s_and_b64 s[10:11], vcc, s[6:7] -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 -; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX1064DAGISEL-NEXT: ; %bb.2: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v[0:1], v[2:3], off -; GFX1064DAGISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1064GISEL-LABEL: divergent_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0 -; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] -; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5 -; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s12 -; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s12 -; GFX1064GISEL-NEXT: v_cmp_gt_u64_e32 vcc, s[8:9], v[4:5] -; GFX1064GISEL-NEXT: s_and_b64 s[10:11], vcc, s[6:7] -; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12 -; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX1064GISEL-NEXT: ; %bb.2: -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX1064GISEL-NEXT: global_store_dwordx2 v[0:1], v[2:3], off -; GFX1064GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1032DAGISEL-LABEL: divergent_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0 -; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 -; GFX1032DAGISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[8:9], v[4:5] -; GFX1032DAGISEL-NEXT: s_and_b32 s10, vcc_lo, s6 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 -; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX1032DAGISEL-NEXT: ; %bb.2: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v[0:1], v[2:3], off -; GFX1032DAGISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1032GISEL-LABEL: divergent_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0 -; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5 -; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 -; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 -; GFX1032GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[8:9], v[4:5] -; GFX1032GISEL-NEXT: s_and_b32 s10, vcc_lo, s6 -; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 -; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX1032GISEL-NEXT: ; %bb.2: -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 -; GFX1032GISEL-NEXT: global_store_dwordx2 v[0:1], v[2:3], off -; GFX1032GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1164DAGISEL-LABEL: divergent_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1 -; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s8 -; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v3, s8 -; GFX1164DAGISEL-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[4:5] -; GFX1164DAGISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3] -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 -; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX1164DAGISEL-NEXT: ; %bb.2: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX1164DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off -; GFX1164DAGISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1164GISEL-LABEL: divergent_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] -; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1 -; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s8 -; GFX1164GISEL-NEXT: v_readlane_b32 s5, v3, s8 -; GFX1164GISEL-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[4:5] -; GFX1164GISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3] -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 -; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX1164GISEL-NEXT: ; %bb.2: -; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX1164GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off -; GFX1164GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1132DAGISEL-LABEL: divergent_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 -; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 -; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v3, s3 -; GFX1132DAGISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[4:5] -; GFX1132DAGISEL-NEXT: s_and_b32 s6, vcc_lo, s2 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX1132DAGISEL-NEXT: ; %bb.2: -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off -; GFX1132DAGISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX1132GISEL-LABEL: divergent_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -<<<<<<< HEAD -; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 -; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 -; GFX1132GISEL-NEXT: v_readlane_b32 s5, v3, s3 -; GFX1132GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[4:5] -; GFX1132GISEL-NEXT: s_and_b32 s6, vcc_lo, s2 -; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 -; GFX1132GISEL-NEXT: ; %bb.2: -; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off -; GFX1132GISEL-NEXT: s_setpc_b64 s[30:31] ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) entry: %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 %id.x, i32 1) store i64 %result, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll index 29412bc..c2cfb88 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll @@ -1198,19 +1198,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], -1 -; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1233,19 +1222,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-LABEL: divergent_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX8GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX8GISEL-NEXT: s_mov_b32 s4, -1 -; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: s_mov_b32 s5, s4 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX8GISEL-NEXT: s_mov_b64 s[4:5], -1 -; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1268,19 +1246,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-LABEL: divergent_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], -1 -; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1303,19 +1270,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-LABEL: divergent_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX9GISEL-NEXT: s_mov_b64 s[4:5], -1 -; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX9GISEL-NEXT: s_mov_b32 s4, -1 -; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: s_mov_b32 s5, s4 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1338,19 +1294,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-LABEL: divergent_value_i64: ; GFX1064DAGISEL: ; %bb.0: ; %entry ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], -1 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1372,19 +1317,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-LABEL: divergent_value_i64: ; GFX1064GISEL: ; %bb.0: ; %entry ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -======= -; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: s_mov_b32 s5, s4 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], -1 -; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1406,19 +1340,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-LABEL: divergent_value_i64: ; GFX1032DAGISEL: ; %bb.0: ; %entry ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -======= -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], -1 -; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1440,19 +1363,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-LABEL: divergent_value_i64: ; GFX1032GISEL: ; %bb.0: ; %entry ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], -1 -; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -======= -; GFX1032GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s5, s4 ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 @@ -1474,36 +1386,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-LABEL: divergent_value_i64: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -======= -; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1 -======= -; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], -1 ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1 ; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s8 ; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v3, s8 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164DAGISEL-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[4:5] ; GFX1164DAGISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3] ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 @@ -1519,36 +1410,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-LABEL: divergent_value_i64: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], -1 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -======= -; GFX1164GISEL-NEXT: s_mov_b32 s0, -1 -======= ; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], -1 ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1 ; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s8 ; GFX1164GISEL-NEXT: v_readlane_b32 s5, v3, s8 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1164GISEL-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[4:5] ; GFX1164GISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3] ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 @@ -1564,35 +1434,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-LABEL: divergent_value_i64: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD -; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], -1 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -======= -; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1 -======= ; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], -1 ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v3, s3 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132DAGISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[4:5] ; GFX1132DAGISEL-NEXT: s_and_b32 s6, vcc_lo, s2 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 @@ -1607,35 +1456,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-LABEL: divergent_value_i64: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -<<<<<<< HEAD -<<<<<<< HEAD ; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -======= -; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 -======= -; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], -1 ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -<<<<<<< HEAD ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v3, s3 -<<<<<<< HEAD -<<<<<<< HEAD -======= -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ->>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1) -======= ->>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.) ; GFX1132GISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[4:5] ; GFX1132GISEL-NEXT: s_and_b32 s6, vcc_lo, s2 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 |