diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-07-03 16:46:49 +0100 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-07-03 16:59:52 +0100 |
commit | f60f7b47a92dbf2f3d994868d3cd43b86a18a76c (patch) | |
tree | 300227e597b1c186c98d7e21fe9a1d3b75c16dc1 | |
parent | bf9e9e5e843838f9bc905d4a0d575d8d39e9b5f2 (diff) | |
download | llvm-f60f7b47a92dbf2f3d994868d3cd43b86a18a76c.zip llvm-f60f7b47a92dbf2f3d994868d3cd43b86a18a76c.tar.gz llvm-f60f7b47a92dbf2f3d994868d3cd43b86a18a76c.tar.bz2 |
[InstCombine][X86] Add multiply-by-one handling for MULH/PMULHU/PMULHRS intrinsics
MULH/PMULHU simplifies to ASHR/ZERO as they just become a SEXT/ZEXT sign-splat instruction
PMULHRS doesn't simplify as much so I've not attempted to fold it.
-rw-r--r-- | llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp | 10 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll | 12 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll | 18 |
3 files changed, 22 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 6d4734d..163584b 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -521,6 +521,16 @@ static Value *simplifyX86pmulh(IntrinsicInst &II, if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) return ConstantAggregateZero::get(ResTy); + // Multiply by one. + if (!IsRounding) { + if (match(Arg0, PatternMatch::m_One())) + return IsSigned ? Builder.CreateAShr(Arg1, 15) + : ConstantAggregateZero::get(ResTy); + if (match(Arg1, PatternMatch::m_One())) + return IsSigned ? Builder.CreateAShr(Arg0, 15) + : ConstantAggregateZero::get(ResTy); + } + // Constant folding. if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1)) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll index 699a3c9..185ab46 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll @@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) { define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) @@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0) @@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) @@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0) @@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512( -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) @@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll index e970ae6..b18833f 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll @@ -111,8 +111,7 @@ define <32 x i16> @zero_pmulhu_512_commute(<32 x i16> %a0) { define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_128( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) -; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) ret <8 x i16> %1 @@ -120,8 +119,7 @@ define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) { define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_128_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]]) -; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; CHECK-NEXT: ret <8 x i16> zeroinitializer ; %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0) ret <8 x i16> %1 @@ -129,8 +127,7 @@ define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) { define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_256( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) -; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) ret <16 x i16> %1 @@ -138,8 +135,7 @@ define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) { define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_256_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]]) -; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; CHECK-NEXT: ret <16 x i16> zeroinitializer ; %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0) ret <16 x i16> %1 @@ -147,8 +143,7 @@ define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) { define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_512( -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) -; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) ret <32 x i16> %1 @@ -156,8 +151,7 @@ define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) { define <32 x i16> @one_pmulhu_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulhu_512_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]]) -; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; CHECK-NEXT: ret <32 x i16> zeroinitializer ; %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0) ret <32 x i16> %1 |