diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-06-28 13:12:47 +0100 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-06-28 13:26:43 +0100 |
commit | f447597b4511c3f8e657c597aa490a844c3e7c22 (patch) | |
tree | 632aeed558a87d52db4f0a025de774757dbc97ce | |
parent | e5094baf96d593e304d8240e72475f0d5edeb32e (diff) | |
download | llvm-f447597b4511c3f8e657c597aa490a844c3e7c22.zip llvm-f447597b4511c3f8e657c597aa490a844c3e7c22.tar.gz llvm-f447597b4511c3f8e657c597aa490a844c3e7c22.tar.bz2 |
[X86] Convert PMADDWD constant folding tests to whole vector tests to check all the results
-rw-r--r-- | llvm/test/CodeGen/X86/combine-pmadd.ll | 18 |
1 files changed, 6 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/combine-pmadd.ll b/llvm/test/CodeGen/X86/combine-pmadd.ll index faa20db..c4c3e28 100644 --- a/llvm/test/CodeGen/X86/combine-pmadd.ll +++ b/llvm/test/CodeGen/X86/combine-pmadd.ll @@ -88,53 +88,47 @@ define <4 x i32> @combine_pmaddwd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { ret <4 x i32> %4 } -; TODO -define i32 @combine_pmaddwd_constant() { +; TODO: [2] = (-5*13)+(6*-15) = -155 = 4294967141 +define <4 x i32> @combine_pmaddwd_constant() { ; SSE-LABEL: combine_pmaddwd_constant: ; SSE: # %bb.0: ; SSE-NEXT: pmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528] ; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65531,7,65527,65525,13,65521,17,65517] -; SSE-NEXT: pextrd $2, %xmm0, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_pmaddwd_constant: ; AVX: # %bb.0: ; AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528] ; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65531,7,65527,65525,13,65521,17,65517] -; AVX-NEXT: vpextrd $2, %xmm0, %eax ; AVX-NEXT: retq %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> <i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8>, <8 x i16> <i16 -5, i16 7, i16 -9, i16 -11, i16 13, i16 -15, i16 17, i16 -19>) - %2 = extractelement <4 x i32> %1, i32 2 ; (-5*13)+(6*-15) = -155 - ret i32 %2 + ret <4 x i32> %1 } ; ensure we don't assume pmaddwd performs add nsw -define i32 @combine_pmaddwd_constant_nsw() { +; TODO: (-32768*-32768)+(-32768*-32768) = 0x80000000 = 2147483648 +define <4 x i32> @combine_pmaddwd_constant_nsw() { ; SSE-LABEL: combine_pmaddwd_constant_nsw: ; SSE: # %bb.0: ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768] ; SSE-NEXT: pmaddwd %xmm0, %xmm0 -; SSE-NEXT: movd %xmm0, %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: combine_pmaddwd_constant_nsw: ; AVX1: # %bb.0: ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768] ; AVX1-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_pmaddwd_constant_nsw: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768] ; AVX2-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: retq %1 = insertelement <8 x i16> undef, i16 32768, i32 0 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer %3 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %2, <8 x i16> %2) - %4 = extractelement <4 x i32> %3, i32 0 ; (-32768*-32768)+(-32768*-32768) = 0x80000000 - ret i32 %4 + ret <4 x i32> %3 } define <8 x i16> @combine_pmaddubsw_zero(<16 x i8> %a0, <16 x i8> %a1) { |