diff options
author | Craig Topper <craig.topper@sifive.com> | 2025-08-30 21:25:56 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-30 21:25:56 -0700 |
commit | 4a6435397ba587022ecafa772cd504b8659da2fb (patch) | |
tree | 2125af50b6864d4f9104875e02b919626e36dda0 | |
parent | 1d8fdda7b0a9f47c443600bca6af2bc141e4abf7 (diff) | |
download | llvm-4a6435397ba587022ecafa772cd504b8659da2fb.zip llvm-4a6435397ba587022ecafa772cd504b8659da2fb.tar.gz llvm-4a6435397ba587022ecafa772cd504b8659da2fb.tar.bz2 |
[SelectionDAG] Add computeKnownBits for ISD::ROTL/ROTR. (#156142)
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr67333.ll | 10 |
3 files changed, 28 insertions, 15 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 967306a..5c75bc1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3850,6 +3850,22 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false, Op->getFlags().hasExact()); break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { + unsigned Amt = C->getAPIntValue().urem(BitWidth); + + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // Canonicalize to ROTR. + if (Opcode == ISD::ROTL && Amt != 0) + Amt = BitWidth - Amt; + + Known.Zero = Known.Zero.rotr(Amt); + Known.One = Known.One.rotr(Amt); + } + break; case ISD::FSHL: case ISD::FSHR: if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll index 9e39809..693d199 100644 --- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -93,10 +93,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; SSE2-NEXT: psrld $1, %xmm0 ; SSE2-NEXT: pslld $31, %xmm3 ; SSE2-NEXT: por %xmm0, %xmm3 -; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [715827883,715827883,715827883,715827883] +; SSE2-NEXT: pcmpgtd %xmm3, %xmm0 ; SSE2-NEXT: retq ; ; SSE4-LABEL: p4_vector_urem_by_const__splat: @@ -104,9 +102,9 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE4-NEXT: psrld $1, %xmm0 -; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882] -; SSE4-NEXT: pminud %xmm0, %xmm1 -; SSE4-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883] +; SSE4-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: retq ; ; AVX2-LABEL: p4_vector_urem_by_const__splat: @@ -116,9 +114,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531] ; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882] -; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883] +; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two diff --git a/llvm/test/CodeGen/X86/pr67333.ll b/llvm/test/CodeGen/X86/pr67333.ll index 9463809..cbb730857 100644 --- a/llvm/test/CodeGen/X86/pr67333.ll +++ b/llvm/test/CodeGen/X86/pr67333.ll @@ -14,12 +14,12 @@ define void @SHA256_Compress_Generic(ptr noundef %ctx) #1 { ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2 ; CHECK-NEXT: vpsrld $17, %xmm2, %xmm0 ; CHECK-NEXT: vpslld $15, %xmm2, %xmm3 -; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vpsrld $19, %xmm2, %xmm3 +; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm3 +; CHECK-NEXT: vpsrld $19, %xmm2, %xmm0 ; CHECK-NEXT: vpslld $13, %xmm2, %xmm4 -; CHECK-NEXT: vpor %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm3 -; CHECK-NEXT: vpxor %xmm2, %xmm3, %xmm0 +; CHECK-NEXT: vpor %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vmovd %ecx, %xmm4 ; CHECK-NEXT: vpshufb %xmm1, %xmm4, %xmm1 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm1 |