diff options
author | Craig Topper <craig.topper@sifive.com> | 2024-08-14 08:44:57 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-14 08:44:57 -0700 |
commit | abc1acf8df3b212a03650c314b7832b3aa7ccd42 (patch) | |
tree | 6d1dc476d9b30e41aacde9c6559f6665b11f9c63 | |
parent | 294ed6a1eb090627441ee0426a64d8f71985fce3 (diff) | |
download | llvm-abc1acf8df3b212a03650c314b7832b3aa7ccd42.zip llvm-abc1acf8df3b212a03650c314b7832b3aa7ccd42.tar.gz llvm-abc1acf8df3b212a03650c314b7832b3aa7ccd42.tar.bz2 |
[TargetLowering][AMDGPU][ARM][RISCV][X86] Teach SimplifyDemandedBits to combine (srl (sra X, C1), ShAmt) -> sra(X, C1+ShAmt) (#101751)
If the upper bits of the shr aren't demanded.
This helps with cases where the outer srl was originally an sra and was
converted to a srl by SimplifyDemandedBits before it had a chance to
combine with the inner sra. This can occur when the inner sra was part
of a sign_extend_inreg expansion.
There are some regressions in ARM and Thumb2.
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/permute_i8.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/div.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rv64zba.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll | 6 |
12 files changed, 74 insertions, 79 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c4f4261..b5bca59 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1958,6 +1958,22 @@ bool TargetLowering::SimplifyDemandedBits( } } + // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a + // single sra. We can do this if the top bits are never demanded. + if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) { + if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) { + if (std::optional<uint64_t> InnerSA = + TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { + unsigned C1 = *InnerSA; + // Clamp the combined shift amount if it exceeds the bit width. + unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1); + SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT, + Op0.getOperand(0), NewSA)); + } + } + } + APInt InDemandedMask = (DemandedBits << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll index 595991e..9fbce05e 100644 --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -41,8 +41,8 @@ define i1 @test_srem_even(i4 %X) nounwind { define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; CHECK-LABEL: test_srem_pow2_setne: ; CHECK: // %bb.0: -; CHECK-NEXT: sbfx w8, w0, #0, #6 -; CHECK-NEXT: ubfx w8, w8, #9, #2 +; CHECK-NEXT: sbfx w8, w0, #5, #1 +; CHECK-NEXT: and w8, w8, #0x3 ; CHECK-NEXT: add w8, w0, w8 ; CHECK-NEXT: and w8, w8, #0x3c ; CHECK-NEXT: sub w8, w0, w8 diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll index bf98af3..050300a 100644 --- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll +++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll @@ -1049,15 +1049,14 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: v_bfe_i32 v1, v9, 0, 8 -; GFX10-NEXT: v_ashrrev_i32_e32 v3, 24, v9 ; GFX10-NEXT: v_ashrrev_i32_sdwa v2, v2, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_ashrrev_i32_e32 v3, 25, v9 ; GFX10-NEXT: v_lshlrev_b16 v1, 7, v1 -; GFX10-NEXT: v_lshrrev_b16 v3, 1, v3 +; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_ashrrev_i16 v4, 10, v0 ; GFX10-NEXT: v_perm_b32 v0, v9, v0, 0x4010707 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff00, v1 -; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v1, off @@ -1075,23 +1074,22 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: global_load_dword v4, v[0:1], off ; GFX9-NEXT: global_load_dword v9, v[2:3], off -; GFX9-NEXT: v_mov_b32_e32 v0, 26 -; GFX9-NEXT: v_mov_b32_e32 v1, 1 -; GFX9-NEXT: v_mov_b32_e32 v2, 7 +; GFX9-NEXT: v_mov_b32_e32 v1, 7 ; GFX9-NEXT: s_mov_b32 s4, 0x4010707 +; GFX9-NEXT: v_mov_b32_e32 v0, 26 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX9-NEXT: v_lshlrev_b16_sdwa v2, v2, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshlrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_perm_b32 v3, v4, v9, s4 +; GFX9-NEXT: v_perm_b32 v2, v4, v9, s4 +; GFX9-NEXT: v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-NEXT: v_ashrrev_i32_e32 v3, 25, v4 ; GFX9-NEXT: v_ashrrev_i16_e32 v9, 10, v9 -; GFX9-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v2 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v1 +; GFX9-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_or_b32_sdwa v1, v9, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-NEXT: global_store_dword v[5:6], v0, off -; GFX9-NEXT: global_store_dword v[7:8], v3, off +; GFX9-NEXT: global_store_dword v[7:8], v2, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll index 126b17e..2efe27d 100644 --- a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll @@ -43,8 +43,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; CHECK-LABEL: test_srem_pow2_setne: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_bfe_i32 v1, v0, 0, 6 -; CHECK-NEXT: v_bfe_u32 v1, v1, 9, 2 +; CHECK-NEXT: v_bfe_i32 v1, v0, 5, 1 +; CHECK-NEXT: v_and_b32_e32 v1, 3, v1 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v0, v1 ; CHECK-NEXT: v_and_b32_e32 v1, 60, v1 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 diff --git a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll index a4e081d..7f56215 100644 --- a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll @@ -209,8 +209,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; ARM5: @ %bb.0: ; ARM5-NEXT: lsl r1, r0, #26 ; ARM5-NEXT: mov r2, #3 -; ARM5-NEXT: asr r1, r1, #26 -; ARM5-NEXT: and r1, r2, r1, lsr #9 +; ARM5-NEXT: and r1, r2, r1, asr #31 ; ARM5-NEXT: add r1, r0, r1 ; ARM5-NEXT: and r1, r1, #60 ; ARM5-NEXT: sub r0, r0, r1 @@ -222,8 +221,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; ARM6: @ %bb.0: ; ARM6-NEXT: lsl r1, r0, #26 ; ARM6-NEXT: mov r2, #3 -; ARM6-NEXT: asr r1, r1, #26 -; ARM6-NEXT: and r1, r2, r1, lsr #9 +; ARM6-NEXT: and r1, r2, r1, asr #31 ; ARM6-NEXT: add r1, r0, r1 ; ARM6-NEXT: and r1, r1, #60 ; ARM6-NEXT: sub r0, r0, r1 @@ -233,8 +231,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; ; ARM7-LABEL: test_srem_pow2_setne: ; ARM7: @ %bb.0: -; ARM7-NEXT: sbfx r1, r0, #0, #6 -; ARM7-NEXT: ubfx r1, r1, #9, #2 +; ARM7-NEXT: lsl r1, r0, #26 +; ARM7-NEXT: mov r2, #3 +; ARM7-NEXT: and r1, r2, r1, asr #31 ; ARM7-NEXT: add r1, r0, r1 ; ARM7-NEXT: and r1, r1, #60 ; ARM7-NEXT: sub r0, r0, r1 @@ -244,8 +243,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; ; ARM8-LABEL: test_srem_pow2_setne: ; ARM8: @ %bb.0: -; ARM8-NEXT: sbfx r1, r0, #0, #6 -; ARM8-NEXT: ubfx r1, r1, #9, #2 +; ARM8-NEXT: lsl r1, r0, #26 +; ARM8-NEXT: mov r2, #3 +; ARM8-NEXT: and r1, r2, r1, asr #31 ; ARM8-NEXT: add r1, r0, r1 ; ARM8-NEXT: and r1, r1, #60 ; ARM8-NEXT: sub r0, r0, r1 @@ -255,8 +255,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; ; NEON7-LABEL: test_srem_pow2_setne: ; NEON7: @ %bb.0: -; NEON7-NEXT: sbfx r1, r0, #0, #6 -; NEON7-NEXT: ubfx r1, r1, #9, #2 +; NEON7-NEXT: lsl r1, r0, #26 +; NEON7-NEXT: mov r2, #3 +; NEON7-NEXT: and r1, r2, r1, asr #31 ; NEON7-NEXT: add r1, r0, r1 ; NEON7-NEXT: and r1, r1, #60 ; NEON7-NEXT: sub r0, r0, r1 @@ -266,8 +267,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; ; NEON8-LABEL: test_srem_pow2_setne: ; NEON8: @ %bb.0: -; NEON8-NEXT: sbfx r1, r0, #0, #6 -; NEON8-NEXT: ubfx r1, r1, #9, #2 +; NEON8-NEXT: lsl r1, r0, #26 +; NEON8-NEXT: mov r2, #3 +; NEON8-NEXT: and r1, r2, r1, asr #31 ; NEON8-NEXT: add r1, r0, r1 ; NEON8-NEXT: and r1, r1, #60 ; NEON8-NEXT: sub r0, r0, r1 diff --git a/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll index 1a9fa27..37cca86 100644 --- a/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll @@ -90,8 +90,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; MIPSEL-LABEL: test_srem_pow2_setne: ; MIPSEL: # %bb.0: ; MIPSEL-NEXT: sll $1, $4, 26 -; MIPSEL-NEXT: sra $1, $1, 26 -; MIPSEL-NEXT: srl $1, $1, 9 +; MIPSEL-NEXT: sra $1, $1, 31 ; MIPSEL-NEXT: andi $1, $1, 3 ; MIPSEL-NEXT: addu $1, $4, $1 ; MIPSEL-NEXT: andi $1, $1, 60 @@ -104,8 +103,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; MIPS64EL: # %bb.0: ; MIPS64EL-NEXT: sll $1, $4, 0 ; MIPS64EL-NEXT: sll $2, $1, 26 -; MIPS64EL-NEXT: sra $2, $2, 26 -; MIPS64EL-NEXT: srl $2, $2, 9 +; MIPS64EL-NEXT: sra $2, $2, 31 ; MIPS64EL-NEXT: andi $2, $2, 3 ; MIPS64EL-NEXT: addu $2, $1, $2 ; MIPS64EL-NEXT: andi $2, $2, 60 diff --git a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll index b0cc89d..2b07f27 100644 --- a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll @@ -85,8 +85,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; PPC-LABEL: test_srem_pow2_setne: ; PPC: # %bb.0: ; PPC-NEXT: slwi 4, 3, 26 -; PPC-NEXT: srawi 4, 4, 26 -; PPC-NEXT: rlwinm 4, 4, 23, 30, 31 +; PPC-NEXT: srawi 4, 4, 31 +; PPC-NEXT: clrlwi 4, 4, 30 ; PPC-NEXT: add 4, 3, 4 ; PPC-NEXT: rlwinm 4, 4, 0, 26, 29 ; PPC-NEXT: sub 3, 3, 4 @@ -99,8 +99,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; PPC64LE-LABEL: test_srem_pow2_setne: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: slwi 4, 3, 26 -; PPC64LE-NEXT: srawi 4, 4, 26 -; PPC64LE-NEXT: rlwinm 4, 4, 23, 30, 31 +; PPC64LE-NEXT: srawi 4, 4, 31 +; PPC64LE-NEXT: clrlwi 4, 4, 30 ; PPC64LE-NEXT: add 4, 3, 4 ; PPC64LE-NEXT: rlwinm 4, 4, 0, 26, 29 ; PPC64LE-NEXT: sub 3, 3, 4 diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll index 99c83b9..f4e6769 100644 --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -1017,8 +1017,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind { ; RV32I-LABEL: sdiv8_pow2: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: slli a1, a1, 17 +; RV32I-NEXT: srai a1, a1, 2 ; RV32I-NEXT: srli a1, a1, 29 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: slli a0, a0, 24 @@ -1028,8 +1027,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind { ; RV32IM-LABEL: sdiv8_pow2: ; RV32IM: # %bb.0: ; RV32IM-NEXT: slli a1, a0, 24 -; RV32IM-NEXT: srai a1, a1, 24 -; RV32IM-NEXT: slli a1, a1, 17 +; RV32IM-NEXT: srai a1, a1, 2 ; RV32IM-NEXT: srli a1, a1, 29 ; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: slli a0, a0, 24 @@ -1039,8 +1037,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind { ; RV64I-LABEL: sdiv8_pow2: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: slli a1, a1, 49 +; RV64I-NEXT: srai a1, a1, 2 ; RV64I-NEXT: srli a1, a1, 61 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 56 @@ -1050,8 +1047,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind { ; RV64IM-LABEL: sdiv8_pow2: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 56 -; RV64IM-NEXT: srai a1, a1, 56 -; RV64IM-NEXT: slli a1, a1, 49 +; RV64IM-NEXT: srai a1, a1, 2 ; RV64IM-NEXT: srli a1, a1, 61 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 56 @@ -1209,8 +1205,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind { ; RV32I-LABEL: sdiv16_pow2: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: srai a1, a1, 2 ; RV32I-NEXT: srli a1, a1, 29 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: slli a0, a0, 16 @@ -1220,8 +1215,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind { ; RV32IM-LABEL: sdiv16_pow2: ; RV32IM: # %bb.0: ; RV32IM-NEXT: slli a1, a0, 16 -; RV32IM-NEXT: srai a1, a1, 16 -; RV32IM-NEXT: slli a1, a1, 1 +; RV32IM-NEXT: srai a1, a1, 2 ; RV32IM-NEXT: srli a1, a1, 29 ; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: slli a0, a0, 16 @@ -1231,8 +1225,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind { ; RV64I-LABEL: sdiv16_pow2: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: slli a1, a1, 33 +; RV64I-NEXT: srai a1, a1, 2 ; RV64I-NEXT: srli a1, a1, 61 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 48 @@ -1242,8 +1235,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind { ; RV64IM-LABEL: sdiv16_pow2: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 48 -; RV64IM-NEXT: srai a1, a1, 48 -; RV64IM-NEXT: slli a1, a1, 33 +; RV64IM-NEXT: srai a1, a1, 2 ; RV64IM-NEXT: srli a1, a1, 61 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 48 diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 20a0484..87796e2 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1555,16 +1555,14 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind { ; RV64I-LABEL: sext_ashr_zext_i8: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: slli a0, a0, 23 +; RV64I-NEXT: srai a0, a0, 31 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret ; ; RV64ZBANOZBB-LABEL: sext_ashr_zext_i8: ; RV64ZBANOZBB: # %bb.0: ; RV64ZBANOZBB-NEXT: slli a0, a0, 56 -; RV64ZBANOZBB-NEXT: srai a0, a0, 56 -; RV64ZBANOZBB-NEXT: slli a0, a0, 23 +; RV64ZBANOZBB-NEXT: srai a0, a0, 31 ; RV64ZBANOZBB-NEXT: srli a0, a0, 32 ; RV64ZBANOZBB-NEXT: ret ; @@ -1674,16 +1672,14 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind { ; RV64I-LABEL: sext_ashr_zext_i16: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: slli a0, a0, 23 +; RV64I-NEXT: srai a0, a0, 25 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret ; ; RV64ZBANOZBB-LABEL: sext_ashr_zext_i16: ; RV64ZBANOZBB: # %bb.0: ; RV64ZBANOZBB-NEXT: slli a0, a0, 48 -; RV64ZBANOZBB-NEXT: srai a0, a0, 48 -; RV64ZBANOZBB-NEXT: slli a0, a0, 23 +; RV64ZBANOZBB-NEXT: srai a0, a0, 25 ; RV64ZBANOZBB-NEXT: srli a0, a0, 32 ; RV64ZBANOZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 457d038..dc27158 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -222,8 +222,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; RV32-LABEL: test_srem_pow2_setne: ; RV32: # %bb.0: ; RV32-NEXT: slli a1, a0, 26 -; RV32-NEXT: srai a1, a1, 26 -; RV32-NEXT: slli a1, a1, 21 +; RV32-NEXT: srai a1, a1, 1 ; RV32-NEXT: srli a1, a1, 30 ; RV32-NEXT: add a1, a0, a1 ; RV32-NEXT: andi a1, a1, 60 @@ -235,8 +234,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; RV64-LABEL: test_srem_pow2_setne: ; RV64: # %bb.0: ; RV64-NEXT: slli a1, a0, 58 -; RV64-NEXT: srai a1, a1, 58 -; RV64-NEXT: slli a1, a1, 53 +; RV64-NEXT: srai a1, a1, 1 ; RV64-NEXT: srli a1, a1, 62 ; RV64-NEXT: add a1, a0, a1 ; RV64-NEXT: andi a1, a1, 60 @@ -248,8 +246,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; RV32M-LABEL: test_srem_pow2_setne: ; RV32M: # %bb.0: ; RV32M-NEXT: slli a1, a0, 26 -; RV32M-NEXT: srai a1, a1, 26 -; RV32M-NEXT: slli a1, a1, 21 +; RV32M-NEXT: srai a1, a1, 1 ; RV32M-NEXT: srli a1, a1, 30 ; RV32M-NEXT: add a1, a0, a1 ; RV32M-NEXT: andi a1, a1, 60 @@ -261,8 +258,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; RV64M-LABEL: test_srem_pow2_setne: ; RV64M: # %bb.0: ; RV64M-NEXT: slli a1, a0, 58 -; RV64M-NEXT: srai a1, a1, 58 -; RV64M-NEXT: slli a1, a1, 53 +; RV64M-NEXT: srai a1, a1, 1 ; RV64M-NEXT: srli a1, a1, 62 ; RV64M-NEXT: add a1, a0, a1 ; RV64M-NEXT: andi a1, a1, 60 @@ -274,8 +270,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; RV32MV-LABEL: test_srem_pow2_setne: ; RV32MV: # %bb.0: ; RV32MV-NEXT: slli a1, a0, 26 -; RV32MV-NEXT: srai a1, a1, 26 -; RV32MV-NEXT: slli a1, a1, 21 +; RV32MV-NEXT: srai a1, a1, 1 ; RV32MV-NEXT: srli a1, a1, 30 ; RV32MV-NEXT: add a1, a0, a1 ; RV32MV-NEXT: andi a1, a1, 60 @@ -287,8 +282,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; RV64MV-LABEL: test_srem_pow2_setne: ; RV64MV: # %bb.0: ; RV64MV-NEXT: slli a1, a0, 58 -; RV64MV-NEXT: srai a1, a1, 58 -; RV64MV-NEXT: slli a1, a1, 53 +; RV64MV-NEXT: srai a1, a1, 1 ; RV64MV-NEXT: srli a1, a1, 62 ; RV64MV-NEXT: add a1, a0, a1 ; RV64MV-NEXT: andi a1, a1, 60 diff --git a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll index 58bafeb..e3d65a3 100644 --- a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll @@ -44,8 +44,9 @@ define i1 @test_srem_even(i4 %X) nounwind { define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; CHECK-LABEL: test_srem_pow2_setne: ; CHECK: @ %bb.0: -; CHECK-NEXT: sbfx r1, r0, #0, #6 -; CHECK-NEXT: ubfx r1, r1, #9, #2 +; CHECK-NEXT: lsls r1, r0, #26 +; CHECK-NEXT: movs r2, #3 +; CHECK-NEXT: and.w r1, r2, r1, asr #31 ; CHECK-NEXT: add r1, r0 ; CHECK-NEXT: and r1, r1, #60 ; CHECK-NEXT: subs r0, r0, r1 diff --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll index d644ed8..cc4bda8 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll @@ -82,8 +82,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shlb $2, %cl -; X86-NEXT: sarb $5, %cl -; X86-NEXT: shrb $4, %cl +; X86-NEXT: sarb $7, %cl ; X86-NEXT: andb $3, %cl ; X86-NEXT: addb %al, %cl ; X86-NEXT: andb $60, %cl @@ -96,8 +95,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; X64: # %bb.0: ; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: leal (,%rdi,4), %eax -; X64-NEXT: sarb $5, %al -; X64-NEXT: shrb $4, %al +; X64-NEXT: sarb $7, %al ; X64-NEXT: andb $3, %al ; X64-NEXT: addb %dil, %al ; X64-NEXT: andb $60, %al |