diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-07-22 14:24:55 +0100 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-07-22 14:31:11 +0100 |
commit | 05671cec6f736c65b7df29234db828116e6d95dd (patch) | |
tree | 4d87536b8d443f72a836055f317bf354471c422d | |
parent | 65e86a8f3fb44dc09dc2e08526d69e3a57f63995 (diff) | |
download | llvm-05671cec6f736c65b7df29234db828116e6d95dd.zip llvm-05671cec6f736c65b7df29234db828116e6d95dd.tar.gz llvm-05671cec6f736c65b7df29234db828116e6d95dd.tar.bz2 |
[X86] Add tests showing failure to push freeze through SSE PMADD nodes
PMADD guarantee inbounds/saturated ext-multiply-add results
Test to help with regression identified on #84924
-rw-r--r-- | llvm/test/CodeGen/X86/combine-pmadd.ll | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/combine-pmadd.ll b/llvm/test/CodeGen/X86/combine-pmadd.ll index 565d9ef..8ba5a25 100644 --- a/llvm/test/CodeGen/X86/combine-pmadd.ll +++ b/llvm/test/CodeGen/X86/combine-pmadd.ll @@ -63,6 +63,37 @@ define <8 x i32> @combine_pmaddwd_concat(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> ret <8 x i32> %3 } +define <8 x i32> @combine_pmaddwd_concat_freeze(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: combine_pmaddwd_concat_freeze: +; SSE: # %bb.0: +; SSE-NEXT: pmovsxbw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1] +; SSE-NEXT: pmaddwd %xmm2, %xmm0 +; SSE-NEXT: pmaddwd %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: combine_pmaddwd_concat_freeze: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1] +; AVX1-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpmaddwd %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_pmaddwd_concat_freeze: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpmaddwd %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq + %lo = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) + %hi = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) + %flo = freeze <4 x i32> %lo + %fhi = freeze <4 x i32> %hi + %res = shufflevector <4 x i32> %flo, <4 x i32> %fhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i32> %res +} + define <4 x i32> @combine_pmaddwd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: combine_pmaddwd_demandedelts: ; SSE: # %bb.0: @@ -178,6 +209,37 @@ define <16 x i16> @combine_pmaddubsw_concat(<16 x i8> %a0, <16 x i8> %a1, <16 x ret <16 x i16> %3 } +define <16 x i16> @combine_pmaddubsw_concat_freeze(<16 x i8> %a0, <16 x i8> %a1) { +; SSE-LABEL: combine_pmaddubsw_concat_freeze: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; SSE-NEXT: pmaddubsw %xmm2, %xmm0 +; SSE-NEXT: pmaddubsw %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: combine_pmaddubsw_concat_freeze: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_pmaddubsw_concat_freeze: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq + %lo = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) + %hi = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) + %flo = freeze <8 x i16> %lo + %fhi = freeze <8 x i16> %hi + %res = shufflevector <8 x i16> %flo, <8 x i16> %fhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + ret <16 x i16> %res +} + define <8 x i16> @combine_pmaddubsw_demandedelts(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: combine_pmaddubsw_demandedelts: ; SSE: # %bb.0: |