diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-10-14 14:53:55 +0100 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-10-14 14:54:03 +0100 |
commit | d81c2f16a3c0ee951147d150f32068eee959b885 (patch) | |
tree | fb9d43d11a4e825dbc9ea0bfec6b787dab5039bb | |
parent | fd8a4b007330c214fc9cc6e2c255cc18fc3c6b0c (diff) | |
download | llvm-d81c2f16a3c0ee951147d150f32068eee959b885.zip llvm-d81c2f16a3c0ee951147d150f32068eee959b885.tar.gz llvm-d81c2f16a3c0ee951147d150f32068eee959b885.tar.bz2 |
[X86] canCreateUndefOrPoisonForTargetNode - X86ISD::VPERMV3 shuffles don't create undef/poison
The operands might contain an undef/poison element, but the shuffle node itself will not create one by itself.
Improves test case from #109272
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll | 3 |
2 files changed, 4 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e1ce5bb..e70fbd8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43839,7 +43839,8 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( switch (Op.getOpcode()) { case X86ISD::PSHUFD: - case X86ISD::VPERMILPI: { + case X86ISD::VPERMILPI: + case X86ISD::VPERMV3: { SmallVector<int, 8> Mask; SmallVector<SDValue, 2> Ops; if (getTargetShuffleMask(Op, true, Ops, Mask)) { @@ -43883,6 +43884,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( return false; case X86ISD::PSHUFD: case X86ISD::VPERMILPI: + case X86ISD::VPERMV3: case X86ISD::UNPCKH: case X86ISD::UNPCKL: return false; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll index 32cfb90..04cfa3c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll @@ -151,10 +151,9 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64 define <64 x i8> @combine_vpermi2var_v64i8_with_mask(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2) { ; CHECK-LABEL: combine_vpermi2var_v64i8_with_mask: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: vpmovb2m %zmm1, %k0 ; CHECK-NEXT: knotq %k0, %k1 -; CHECK-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: ret{{[l|q]}} %perm = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2) %cmp = icmp slt <64 x i8> %a1, zeroinitializer |