diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-07-22 14:30:46 +0100 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-07-22 14:31:11 +0100 |
commit | e9974f02a30280eab868cdab0a90a5372b1799cd (patch) | |
tree | 437ab11918ecf3d1052c887c44cbcf4fc2fbc4f2 | |
parent | 05671cec6f736c65b7df29234db828116e6d95dd (diff) | |
download | llvm-e9974f02a30280eab868cdab0a90a5372b1799cd.zip llvm-e9974f02a30280eab868cdab0a90a5372b1799cd.tar.gz llvm-e9974f02a30280eab868cdab0a90a5372b1799cd.tar.bz2 |
[X86] canCreateUndefOrPoisonForTargetNode - PMADDWD/PMADDUBSW nodes don't create poison
Help with regression identified on #84924
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-pmadd.ll | 10 |
2 files changed, 9 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f06b962..5a9d679 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43315,6 +43315,9 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { switch (Op.getOpcode()) { + // SSE vector multiplies are either inbounds or saturate. + case X86ISD::VPMADDUBSW: + case X86ISD::VPMADDWD: // SSE vector shifts handle out of bounds shift amounts. case X86ISD::VSHLI: case X86ISD::VSRLI: diff --git a/llvm/test/CodeGen/X86/combine-pmadd.ll b/llvm/test/CodeGen/X86/combine-pmadd.ll index 8ba5a25..f184970 100644 --- a/llvm/test/CodeGen/X86/combine-pmadd.ll +++ b/llvm/test/CodeGen/X86/combine-pmadd.ll @@ -81,10 +81,11 @@ define <8 x i32> @combine_pmaddwd_concat_freeze(<8 x i16> %a0, <8 x i16> %a1) { ; ; AVX2-LABEL: combine_pmaddwd_concat_freeze: ; AVX2: # %bb.0: +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1] -; AVX2-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpmaddwd %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm1 +; AVX2-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq %lo = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) %hi = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) @@ -227,10 +228,11 @@ define <16 x i16> @combine_pmaddubsw_concat_freeze(<16 x i8> %a0, <16 x i8> %a1) ; ; AVX2-LABEL: combine_pmaddubsw_concat_freeze: ; AVX2: # %bb.0: +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX2-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm1 +; AVX2-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq %lo = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) %hi = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) |