aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2024-07-22 14:24:55 +0100
committerSimon Pilgrim <llvm-dev@redking.me.uk>2024-07-22 14:31:11 +0100
commit05671cec6f736c65b7df29234db828116e6d95dd (patch)
tree4d87536b8d443f72a836055f317bf354471c422d
parent65e86a8f3fb44dc09dc2e08526d69e3a57f63995 (diff)
downloadllvm-05671cec6f736c65b7df29234db828116e6d95dd.zip
llvm-05671cec6f736c65b7df29234db828116e6d95dd.tar.gz
llvm-05671cec6f736c65b7df29234db828116e6d95dd.tar.bz2
[X86] Add tests showing failure to push freeze through SSE PMADD nodes
PMADD guarantee inbounds/saturated ext-multiply-add results Test to help with regression identified on #84924
-rw-r--r--llvm/test/CodeGen/X86/combine-pmadd.ll62
1 files changed, 62 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/combine-pmadd.ll b/llvm/test/CodeGen/X86/combine-pmadd.ll
index 565d9ef..8ba5a25 100644
--- a/llvm/test/CodeGen/X86/combine-pmadd.ll
+++ b/llvm/test/CodeGen/X86/combine-pmadd.ll
@@ -63,6 +63,37 @@ define <8 x i32> @combine_pmaddwd_concat(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>
ret <8 x i32> %3
}
+define <8 x i32> @combine_pmaddwd_concat_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; SSE-LABEL: combine_pmaddwd_concat_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: pmovsxbw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
+; SSE-NEXT: pmaddwd %xmm2, %xmm0
+; SSE-NEXT: pmaddwd %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: combine_pmaddwd_concat_freeze:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpmaddwd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_pmaddwd_concat_freeze:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
+; AVX2-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpmaddwd %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %lo = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ %hi = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ %flo = freeze <4 x i32> %lo
+ %fhi = freeze <4 x i32> %hi
+ %res = shufflevector <4 x i32> %flo, <4 x i32> %fhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %res
+}
+
define <4 x i32> @combine_pmaddwd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: combine_pmaddwd_demandedelts:
; SSE: # %bb.0:
@@ -178,6 +209,37 @@ define <16 x i16> @combine_pmaddubsw_concat(<16 x i8> %a0, <16 x i8> %a1, <16 x
ret <16 x i16> %3
}
+define <16 x i16> @combine_pmaddubsw_concat_freeze(<16 x i8> %a0, <16 x i8> %a1) {
+; SSE-LABEL: combine_pmaddubsw_concat_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE-NEXT: pmaddubsw %xmm2, %xmm0
+; SSE-NEXT: pmaddubsw %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: combine_pmaddubsw_concat_freeze:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_pmaddubsw_concat_freeze:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX2-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %lo = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ %hi = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ %flo = freeze <8 x i16> %lo
+ %fhi = freeze <8 x i16> %hi
+ %res = shufflevector <8 x i16> %flo, <8 x i16> %fhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i16> %res
+}
+
define <8 x i16> @combine_pmaddubsw_demandedelts(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: combine_pmaddubsw_demandedelts:
; SSE: # %bb.0: