diff options
| author | Thurston Dang <thurston@google.com> | 2025-03-08 19:08:08 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-03-08 19:08:08 -0800 |
| commit | 3a0c33afd1b3085d958e89c1c0c8a8b8c0f34739 (patch) | |
| tree | 012ee27b1dd7294f222a7f4beb94f597bd63e64d | |
| parent | 0715c6cc6d11814b8a31bc0368ca080b72810177 (diff) | |
| download | llvm-3a0c33afd1b3085d958e89c1c0c8a8b8c0f34739.zip llvm-3a0c33afd1b3085d958e89c1c0c8a8b8c0f34739.tar.gz llvm-3a0c33afd1b3085d958e89c1c0c8a8b8c0f34739.tar.bz2 | |
[msan] Handle Arm NEON pairwise min/max instructions (#129824)
Change the handling of:
- llvm.aarch64.neon.fmaxp
- llvm.aarch64.neon.fminp
- llvm.aarch64.neon.fmaxnmp
- llvm.aarch64.neon.fminnmp
- llvm.aarch64.neon.smaxp
- llvm.aarch64.neon.sminp
- llvm.aarch64.neon.umaxp
- llvm.aarch64.neon.uminp
from the incorrect heuristic handler (maybeHandleSimpleNomemIntrinsic)
to handlePairwiseShadowOrIntrinsic.
Updates the tests from https://github.com/llvm/llvm-project/pull/129760
Adds a note that maybeHandleSimpleNomemIntrinsic may incorrectly match
horizontal/pairwise intrinsics.
| -rw-r--r-- | llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 15 | ||||
| -rw-r--r-- | llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll | 150 |
2 files changed, 125 insertions, 40 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index ba925275..cebcfde 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3129,6 +3129,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// fine). /// /// Caller guarantees that this intrinsic does not access memory. + /// + /// TODO: "horizontal"/"pairwise" intrinsics are often incorrectly matched by + /// by this handler. [[maybe_unused]] bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I, unsigned int trailingFlags) { @@ -4871,6 +4874,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOrigin(&I, getCleanOrigin()); break; + // TODO: handling max/min similarly to AND/OR may be more precise + // Floating-Point Maximum/Minimum Pairwise + case Intrinsic::aarch64_neon_fmaxp: + case Intrinsic::aarch64_neon_fminp: + // Floating-Point Maximum/Minimum Number Pairwise + case Intrinsic::aarch64_neon_fmaxnmp: + case Intrinsic::aarch64_neon_fminnmp: + // Signed/Unsigned Maximum/Minimum Pairwise + case Intrinsic::aarch64_neon_smaxp: + case Intrinsic::aarch64_neon_sminp: + case Intrinsic::aarch64_neon_umaxp: + case Intrinsic::aarch64_neon_uminp: // Add Pairwise case Intrinsic::aarch64_neon_addp: // Floating-point Add Pairwise diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll index a46a5b1..e2457c0 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vmax.ll @@ -3,7 +3,8 @@ ; ; Forked from llvm/test/CodeGen/AArch64/arm64-vmax.ll ; -; Pairwise instructions which are handled incorrectly by heuristics: +; Pairwise instructions which are explicitly handled by +; handlePairwiseShadowOrIntrinsic: ; - llvm.aarch64.neon.fmaxp (floating-point maximum pairwise) ; - llvm.aarch64.neon.fminp ; - llvm.aarch64.neon.fmaxnmp (floating-point maximum number pairwise) @@ -1017,7 +1018,9 @@ define <8 x i8> @smaxp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMPVAR3]] @@ -1056,7 +1059,9 @@ define <16 x i8> @smaxp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMPVAR3]] @@ -1095,7 +1100,9 @@ define <4 x i16> @smaxp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR3]] @@ -1134,7 +1141,9 @@ define <8 x i16> @smaxp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR3]] @@ -1173,7 +1182,9 @@ define <2 x i32> @smaxp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR3]] @@ -1212,7 +1223,9 @@ define <4 x i32> @smaxp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR3]] @@ -1258,7 +1271,9 @@ define <8 x i8> @umaxp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMPVAR3]] @@ -1297,7 +1312,9 @@ define <16 x i8> @umaxp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMPVAR3]] @@ -1336,7 +1353,9 @@ define <4 x i16> @umaxp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR3]] @@ -1375,7 +1394,9 @@ define <8 x i16> @umaxp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR3]] @@ -1414,7 +1435,9 @@ define <2 x i32> @umaxp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR3]] @@ -1453,7 +1476,9 @@ define <4 x i32> @umaxp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR3]] @@ -1500,7 +1525,9 @@ define <8 x i8> @sminp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMPVAR3]] @@ -1539,7 +1566,9 @@ define <16 x i8> @sminp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMPVAR3]] @@ -1578,7 +1607,9 @@ define <4 x i16> @sminp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR3]] @@ -1617,7 +1648,9 @@ define <8 x i16> @sminp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR3]] @@ -1656,7 +1689,9 @@ define <2 x i32> @sminp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR3]] @@ -1695,7 +1730,9 @@ define <4 x i32> @sminp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR3]] @@ -1741,7 +1778,9 @@ define <8 x i8> @uminp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMPVAR3]] @@ -1780,7 +1819,9 @@ define <16 x i8> @uminp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMPVAR3]] @@ -1819,7 +1860,9 @@ define <4 x i16> @uminp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR3]] @@ -1858,7 +1901,9 @@ define <8 x i16> @uminp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR3]] @@ -1897,7 +1942,9 @@ define <2 x i32> @uminp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR3]] @@ -1936,7 +1983,9 @@ define <4 x i32> @uminp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR3]] @@ -2103,7 +2152,9 @@ define <2 x float> @fmaxp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x float> [[TMPVAR3]] @@ -2142,7 +2193,9 @@ define <4 x float> @fmaxp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMPVAR3]] @@ -2181,7 +2234,9 @@ define <2 x double> @fmaxp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMPVAR3]] @@ -2345,7 +2400,9 @@ define <2 x float> @fminp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x float> [[TMPVAR3]] @@ -2384,7 +2441,9 @@ define <4 x float> @fminp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMPVAR3]] @@ -2423,7 +2482,9 @@ define <2 x double> @fminp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMPVAR3]] @@ -2466,7 +2527,9 @@ define <2 x float> @fminnmp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x float> [[TMPVAR3]] @@ -2505,7 +2568,9 @@ define <4 x float> @fminnmp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMPVAR3]] @@ -2544,7 +2609,9 @@ define <2 x double> @fminnmp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMPVAR3]] @@ -2587,7 +2654,9 @@ define <2 x float> @fmaxnmp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x float> [[TMPVAR3]] @@ -2626,7 +2695,9 @@ define <4 x float> @fmaxnmp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMPVAR3]] @@ -2665,7 +2736,9 @@ define <2 x double> @fmaxnmp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> <i32 1, i32 3> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMPVAR3]] @@ -2681,6 +2754,3 @@ declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) n declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone attributes #0 = { sanitize_memory } -;. -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} -;. |
