diff options
author | Thurston Dang <thurston@google.com> | 2025-09-02 17:16:57 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-09-02 10:16:57 -0700 |
commit | 1cc84bcc08f723a6ba9d845c3fed1777547f45f9 (patch) | |
tree | ea76b64b454e49d2c5db268d898c8e59949ffd04 | |
parent | abb62b6ede461ac384d169fa9d9122328dc11809 (diff) | |
download | llvm-1cc84bcc08f723a6ba9d845c3fed1777547f45f9.zip llvm-1cc84bcc08f723a6ba9d845c3fed1777547f45f9.tar.gz llvm-1cc84bcc08f723a6ba9d845c3fed1777547f45f9.tar.bz2 |
[msan] Fix multiply-add-accumulate (#153927) to use ReductionFactor (#155748)
https://github.com/llvm/llvm-project/pull/153927 incorrectly cast using
a hardcoded reduction factor of two, rather than using the parameter.
This caused false negatives but not false positives. (The only incorrect
case was a reduction factor of four; if four values {A,B,C,D} are being
reduced, the result is fully zero iff {A,B} and {C,D} are both zero
after pairwise reduction. If only one of those reduced pairs is zero,
then the quadwise reduction is non-zero.)
9 files changed, 171 insertions, 279 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b1ffe4f..a50e8b3 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3910,8 +3910,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { FixedVectorType *ImplicitReturnType = ReturnType; // Step 1: instrument multiplication of corresponding vector elements if (EltSizeInBits) { - ImplicitReturnType = cast<FixedVectorType>(getMMXVectorTy( - EltSizeInBits * 2, ParamType->getPrimitiveSizeInBits())); + ImplicitReturnType = cast<FixedVectorType>( + getMMXVectorTy(EltSizeInBits * ReductionFactor, + ParamType->getPrimitiveSizeInBits())); ParamType = cast<FixedVectorType>( getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits())); @@ -3959,7 +3960,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Step 2: instrument horizontal add // We don't need bit-precise horizontalReduce because we only want to check - // if each pair of elements is fully zero. + // if each pair/quad of elements is fully zero. // Cast to <4 x i32>. Value *Horizontal = IRB.CreateBitCast(And, ImplicitReturnType); @@ -3969,7 +3970,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Constant::getNullValue(Horizontal->getType())), ImplicitReturnType); - // Cast it back to the required fake return type (<1 x i64>). + // Cast it back to the required fake return type (if MMX: <1 x i64>; for + // AVX, it is already correct). if (EltSizeInBits) OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I)); diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll index 298dc4b..802cffc 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll @@ -156,11 +156,9 @@ define <16 x i32> @test_mm512_dpbssd_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr ; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i1> [[TMP17]], [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = or <64 x i1> [[TMP20]], [[TMP19]] ; CHECK-NEXT: [[TMP22:%.*]] = sext <64 x i1> [[TMP21]] to <64 x i8> -; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i8> [[TMP22]] to <32 x i16> -; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <32 x i16> [[TMP23]], zeroinitializer -; CHECK-NEXT: [[TMP25:%.*]] = sext <32 x i1> [[TMP24]] to <32 x i16> -; CHECK-NEXT: [[TMP26:%.*]] = bitcast <32 x i16> [[TMP25]] to i512 -; CHECK-NEXT: [[TMP27:%.*]] = bitcast i512 [[TMP26]] to <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i8> [[TMP22]] to <16 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <16 x i32> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = sext <16 x i1> [[TMP24]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP27]], [[TMP4]] ; CHECK-NEXT: [[RES:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -193,11 +191,9 @@ define <16 x i32> @test_mm512_mask_dpbssds_epi32(<16 x i32> %__W, i16 zeroext %_ ; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = sext <64 x i1> [[TMP17]] to <64 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <32 x i16> -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <32 x i16> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP20]] to <32 x i16> -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <32 x i16> [[TMP21]] to i512 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i512 [[TMP22]] to <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP23]], [[TMP1]] ; CHECK-NEXT: [[DPI:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssds.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> @@ -239,11 +235,9 @@ define <16 x i32> @test_mm512_maskz_dpbssd_epi32(i16 zeroext %__U, <16 x i32> %_ ; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = sext <64 x i1> [[TMP17]] to <64 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <32 x i16> -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <32 x i16> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP20]] to <32 x i16> -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <32 x i16> [[TMP21]] to i512 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i512 [[TMP22]] to <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP23]], [[TMP24]] ; CHECK-NEXT: [[DPI:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll index e3a26ae..491a890 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll @@ -265,11 +265,9 @@ define <4 x i32> @test_mm_mask_dpbssd_epi32(<4 x i32> %__W, i4 zeroext %__U, <4 ; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16> -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16> -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP1]] ; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1> @@ -311,11 +309,9 @@ define <4 x i32> @test_mm_maskz_dpbssds_epi32(i4 zeroext %__U, <4 x i32> %__W, < ; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16> -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16> -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP24]] ; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1> @@ -357,11 +353,9 @@ define <8 x i32> @test_mm256_maskz_dpbssds_epi32(<8 x i32> %__W, i8 zeroext %__U ; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16> -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16> -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP1]] ; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -403,11 +397,9 @@ define <8 x i32> @test_mm256_mask_dpbssd_epi32(i8 zeroext %__U, <8 x i32> %__W, ; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16> -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16> -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP24]] ; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll index 822e546..331d434 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll @@ -34,11 +34,9 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -82,11 +80,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -111,11 +107,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> ; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <32 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <32 x i1> [[TMP51]] to <32 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP52]] to <16 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i16> [[TMP55]] to i256 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i256 [[TMP56]] to <8 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i8> [[TMP52]] to <8 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <8 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <8 x i1> [[TMP56]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -165,11 +159,9 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, ; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -213,11 +205,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <16 x i1> [[TMP61]] to <16 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i8> [[TMP62]] to <8 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i16> [[TMP65]] to i128 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i128 [[TMP66]] to <4 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP62]] to <4 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <4 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP54]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -244,11 +234,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> ; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <16 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <16 x i1> [[TMP51]] to <16 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP52]] to <8 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i16> [[TMP55]] to i128 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <4 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i8> [[TMP52]] to <4 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <4 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <4 x i1> [[TMP56]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP5:%.*]] = or <4 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -300,11 +288,9 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -348,11 +334,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32 ; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -377,11 +361,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32 ; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <32 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <32 x i1> [[TMP51]] to <32 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP52]] to <16 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i16> [[TMP55]] to i256 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i256 [[TMP56]] to <8 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i8> [[TMP52]] to <8 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <8 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <8 x i1> [[TMP56]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -431,11 +413,9 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, ; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -479,11 +459,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32 ; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <16 x i1> [[TMP61]] to <16 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i8> [[TMP62]] to <8 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i16> [[TMP65]] to i128 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i128 [[TMP66]] to <4 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP62]] to <4 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <4 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP54]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -510,11 +488,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32 ; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <16 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <16 x i1> [[TMP51]] to <16 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP52]] to <8 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i16> [[TMP55]] to i128 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <4 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i8> [[TMP52]] to <4 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <4 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <4 x i1> [[TMP56]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP5:%.*]] = or <4 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll index 38f4272..4647ee8 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll @@ -33,11 +33,9 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -81,11 +79,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -110,11 +106,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> ; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <32 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <32 x i1> [[TMP51]] to <32 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP52]] to <16 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i16> [[TMP55]] to i256 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i256 [[TMP56]] to <8 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i8> [[TMP52]] to <8 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <8 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <8 x i1> [[TMP56]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -167,11 +161,9 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, ; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -215,11 +207,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <16 x i1> [[TMP61]] to <16 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i8> [[TMP62]] to <8 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i16> [[TMP65]] to i128 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i128 [[TMP66]] to <4 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP62]] to <4 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <4 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP54]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -246,11 +236,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> ; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <16 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <16 x i1> [[TMP51]] to <16 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP52]] to <8 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i16> [[TMP55]] to i128 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <4 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i8> [[TMP52]] to <4 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <4 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <4 x i1> [[TMP56]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -307,11 +295,9 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -355,11 +341,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32 ; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -384,11 +368,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32 ; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <32 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <32 x i1> [[TMP51]] to <32 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP52]] to <16 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i16> [[TMP55]] to i256 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i256 [[TMP56]] to <8 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i8> [[TMP52]] to <8 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <8 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <8 x i1> [[TMP56]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -441,11 +423,9 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, ; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -489,11 +469,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32 ; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <16 x i1> [[TMP61]] to <16 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i8> [[TMP62]] to <8 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i16> [[TMP65]] to i128 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i128 [[TMP66]] to <4 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP62]] to <4 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <4 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP54]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> @@ -520,11 +498,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32 ; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <16 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <16 x i1> [[TMP51]] to <16 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP52]] to <8 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i16> [[TMP55]] to i128 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <4 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i8> [[TMP52]] to <4 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <4 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <4 x i1> [[TMP56]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll index f146823..83f83df 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll @@ -34,11 +34,9 @@ define <16 x i32>@test_int_x86_avx512_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP16]] to <64 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <32 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[TMP20]] to i512 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i512 [[TMP21]] to <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -82,11 +80,9 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i ; CHECK-NEXT: [[TMP60:%.*]] = or <64 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <64 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <64 x i1> [[TMP61]] to <64 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <64 x i8> [[TMP62]] to <32 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <32 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <32 x i1> [[TMP64]] to <32 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <32 x i16> [[TMP65]] to i512 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i512 [[TMP66]] to <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP62]] to <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> @@ -111,11 +107,9 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i ; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <64 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <64 x i1> [[TMP51]] to <64 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP52]] to <32 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <32 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <32 x i1> [[TMP54]] to <32 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <32 x i16> [[TMP55]] to i512 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i512 [[TMP56]] to <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <64 x i8> [[TMP52]] to <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <16 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <16 x i1> [[TMP56]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> @@ -165,11 +159,9 @@ define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> % ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP16]] to <64 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <32 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[TMP20]] to i512 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i512 [[TMP21]] to <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -213,11 +205,9 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x ; CHECK-NEXT: [[TMP60:%.*]] = or <64 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <64 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <64 x i1> [[TMP61]] to <64 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <64 x i8> [[TMP62]] to <32 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <32 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <32 x i1> [[TMP64]] to <32 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <32 x i16> [[TMP65]] to i512 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i512 [[TMP66]] to <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP62]] to <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> @@ -242,11 +232,9 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x ; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <64 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <64 x i1> [[TMP51]] to <64 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP52]] to <32 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <32 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <32 x i1> [[TMP54]] to <32 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <32 x i16> [[TMP55]] to i512 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i512 [[TMP56]] to <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <64 x i8> [[TMP52]] to <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <16 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <16 x i1> [[TMP56]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll index 7c39ff6..053c809 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll @@ -33,11 +33,9 @@ define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <16 x i3 ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP16]] to <64 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <32 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[TMP20]] to i512 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i512 [[TMP21]] to <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -81,11 +79,9 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i ; CHECK-NEXT: [[TMP60:%.*]] = or <64 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <64 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <64 x i1> [[TMP61]] to <64 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <64 x i8> [[TMP62]] to <32 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <32 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <32 x i1> [[TMP64]] to <32 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <32 x i16> [[TMP65]] to i512 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i512 [[TMP66]] to <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP62]] to <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> @@ -110,11 +106,9 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i ; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <64 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <64 x i1> [[TMP51]] to <64 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP52]] to <32 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <32 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <32 x i1> [[TMP54]] to <32 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <32 x i16> [[TMP55]] to i512 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i512 [[TMP56]] to <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <64 x i8> [[TMP52]] to <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <16 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <16 x i1> [[TMP56]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> @@ -167,11 +161,9 @@ define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> % ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <64 x i1> [[TMP16]] to <64 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <32 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <32 x i16> [[TMP20]] to i512 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i512 [[TMP21]] to <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <64 x i8> [[TMP17]] to <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -215,11 +207,9 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x ; CHECK-NEXT: [[TMP60:%.*]] = or <64 x i1> [[TMP38]], [[TMP58]] ; CHECK-NEXT: [[TMP61:%.*]] = or <64 x i1> [[TMP60]], [[TMP59]] ; CHECK-NEXT: [[TMP62:%.*]] = sext <64 x i1> [[TMP61]] to <64 x i8> -; CHECK-NEXT: [[TMP63:%.*]] = bitcast <64 x i8> [[TMP62]] to <32 x i16> -; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <32 x i16> [[TMP63]], zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = sext <32 x i1> [[TMP64]] to <32 x i16> -; CHECK-NEXT: [[TMP66:%.*]] = bitcast <32 x i16> [[TMP65]] to i512 -; CHECK-NEXT: [[TMP29:%.*]] = bitcast i512 [[TMP66]] to <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP62]] to <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i32> [[TMP53]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP29]], [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> @@ -244,11 +234,9 @@ define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x ; CHECK-NEXT: [[TMP50:%.*]] = or <64 x i1> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP51:%.*]] = or <64 x i1> [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = sext <64 x i1> [[TMP51]] to <64 x i8> -; CHECK-NEXT: [[TMP53:%.*]] = bitcast <64 x i8> [[TMP52]] to <32 x i16> -; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <32 x i16> [[TMP53]], zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = sext <32 x i1> [[TMP54]] to <32 x i16> -; CHECK-NEXT: [[TMP56:%.*]] = bitcast <32 x i16> [[TMP55]] to i512 -; CHECK-NEXT: [[TMP57:%.*]] = bitcast i512 [[TMP56]] to <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <64 x i8> [[TMP52]] to <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <16 x i32> [[TMP55]], zeroinitializer +; CHECK-NEXT: [[TMP57:%.*]] = sext <16 x i1> [[TMP56]] to <16 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i32> [[TMP57]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> [[X0]], <16 x i32> [[X1]], <16 x i32> [[X4]]) ; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll index 678faef..bc8c96e 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll @@ -33,11 +33,9 @@ define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -70,11 +68,9 @@ define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 ; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -107,11 +103,9 @@ define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 @@ -144,11 +138,9 @@ define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 ; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16> -; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll index b36d09b..3df0f1d 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll @@ -45,11 +45,9 @@ define <4 x i32>@test_int_x86_avx2_vpdpbssd_128(<4 x i32> %x0, <4 x i32> %x1, pt ; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i1> [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i1> [[TMP21]], [[TMP20]] ; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP22]] to <16 x i8> -; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP23]] to <8 x i16> -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i16> [[TMP24]], zeroinitializer -; CHECK-NEXT: [[TMP26:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i16> -; CHECK-NEXT: [[TMP27:%.*]] = bitcast <8 x i16> [[TMP26]] to i128 -; CHECK-NEXT: [[TMP28:%.*]] = bitcast i128 [[TMP27]] to <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP23]] to <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <4 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = sext <4 x i1> [[TMP25]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP28]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8> @@ -66,11 +64,9 @@ define <4 x i32>@test_int_x86_avx2_vpdpbssd_128(<4 x i32> %x0, <4 x i32> %x1, pt ; CHECK-NEXT: [[TMP42:%.*]] = or <16 x i1> [[TMP39]], [[TMP40]] ; CHECK-NEXT: [[TMP43:%.*]] = or <16 x i1> [[TMP42]], [[TMP41]] ; CHECK-NEXT: [[TMP44:%.*]] = sext <16 x i1> [[TMP43]] to <16 x i8> -; CHECK-NEXT: [[TMP45:%.*]] = bitcast <16 x i8> [[TMP44]] to <8 x i16> -; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i16> [[TMP45]], zeroinitializer -; CHECK-NEXT: [[TMP47:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i16> -; CHECK-NEXT: [[TMP48:%.*]] = bitcast <8 x i16> [[TMP47]] to i128 -; CHECK-NEXT: [[TMP49:%.*]] = bitcast i128 [[TMP48]] to <4 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = bitcast <16 x i8> [[TMP44]] to <4 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <4 x i32> [[TMP45]], zeroinitializer +; CHECK-NEXT: [[TMP49:%.*]] = sext <4 x i1> [[TMP46]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[TMP49]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]]) ; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSPROP1]], [[_MSPROP3]] @@ -120,11 +116,9 @@ define <4 x i32>@test_int_x86_avx2_vpdpbssds_128(<4 x i32> %x0, <4 x i32> %x1, p ; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i1> [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i1> [[TMP21]], [[TMP20]] ; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP22]] to <16 x i8> -; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP23]] to <8 x i16> -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i16> [[TMP24]], zeroinitializer -; CHECK-NEXT: [[TMP26:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i16> -; CHECK-NEXT: [[TMP27:%.*]] = bitcast <8 x i16> [[TMP26]] to i128 -; CHECK-NEXT: [[TMP28:%.*]] = bitcast i128 [[TMP27]] to <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i8> [[TMP23]] to <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <4 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = sext <4 x i1> [[TMP25]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP28]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[X1]] to <16 x i8> @@ -141,11 +135,9 @@ define <4 x i32>@test_int_x86_avx2_vpdpbssds_128(<4 x i32> %x0, <4 x i32> %x1, p ; CHECK-NEXT: [[TMP42:%.*]] = or <16 x i1> [[TMP39]], [[TMP40]] ; CHECK-NEXT: [[TMP43:%.*]] = or <16 x i1> [[TMP42]], [[TMP41]] ; CHECK-NEXT: [[TMP44:%.*]] = sext <16 x i1> [[TMP43]] to <16 x i8> -; CHECK-NEXT: [[TMP45:%.*]] = bitcast <16 x i8> [[TMP44]] to <8 x i16> -; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i16> [[TMP45]], zeroinitializer -; CHECK-NEXT: [[TMP47:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i16> -; CHECK-NEXT: [[TMP48:%.*]] = bitcast <8 x i16> [[TMP47]] to i128 -; CHECK-NEXT: [[TMP49:%.*]] = bitcast i128 [[TMP48]] to <4 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = bitcast <16 x i8> [[TMP44]] to <4 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <4 x i32> [[TMP45]], zeroinitializer +; CHECK-NEXT: [[TMP49:%.*]] = sext <4 x i1> [[TMP46]] to <4 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[TMP49]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]]) ; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSPROP1]], [[_MSPROP3]] @@ -195,11 +187,9 @@ define <8 x i32>@test_int_x86_avx2_vpdpbssd_256(<8 x i32> %x0, <8 x i32> %x1, pt ; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i1> [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP21]], [[TMP20]] ; CHECK-NEXT: [[TMP23:%.*]] = sext <32 x i1> [[TMP22]] to <32 x i8> -; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP23]] to <16 x i16> -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i16> [[TMP24]], zeroinitializer -; CHECK-NEXT: [[TMP26:%.*]] = sext <16 x i1> [[TMP25]] to <16 x i16> -; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i16> [[TMP26]] to i256 -; CHECK-NEXT: [[TMP28:%.*]] = bitcast i256 [[TMP27]] to <8 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP23]] to <8 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP28]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8> @@ -216,11 +206,9 @@ define <8 x i32>@test_int_x86_avx2_vpdpbssd_256(<8 x i32> %x0, <8 x i32> %x1, pt ; CHECK-NEXT: [[TMP42:%.*]] = or <32 x i1> [[TMP39]], [[TMP40]] ; CHECK-NEXT: [[TMP43:%.*]] = or <32 x i1> [[TMP42]], [[TMP41]] ; CHECK-NEXT: [[TMP44:%.*]] = sext <32 x i1> [[TMP43]] to <32 x i8> -; CHECK-NEXT: [[TMP45:%.*]] = bitcast <32 x i8> [[TMP44]] to <16 x i16> -; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <16 x i16> [[TMP45]], zeroinitializer -; CHECK-NEXT: [[TMP47:%.*]] = sext <16 x i1> [[TMP46]] to <16 x i16> -; CHECK-NEXT: [[TMP48:%.*]] = bitcast <16 x i16> [[TMP47]] to i256 -; CHECK-NEXT: [[TMP49:%.*]] = bitcast i256 [[TMP48]] to <8 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = bitcast <32 x i8> [[TMP44]] to <8 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i32> [[TMP45]], zeroinitializer +; CHECK-NEXT: [[TMP49:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP49]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]]) ; CHECK-NEXT: [[_MSPROP4:%.*]] = or <8 x i32> [[_MSPROP1]], [[_MSPROP3]] @@ -270,11 +258,9 @@ define <8 x i32>@test_int_x86_avx2_vpdpbssds_256(<8 x i32> %x0, <8 x i32> %x1, p ; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i1> [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP21]], [[TMP20]] ; CHECK-NEXT: [[TMP23:%.*]] = sext <32 x i1> [[TMP22]] to <32 x i8> -; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP23]] to <16 x i16> -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i16> [[TMP24]], zeroinitializer -; CHECK-NEXT: [[TMP26:%.*]] = sext <16 x i1> [[TMP25]] to <16 x i16> -; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i16> [[TMP26]] to i256 -; CHECK-NEXT: [[TMP28:%.*]] = bitcast i256 [[TMP27]] to <8 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP23]] to <8 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP28]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x i32> [[X1]] to <32 x i8> @@ -291,11 +277,9 @@ define <8 x i32>@test_int_x86_avx2_vpdpbssds_256(<8 x i32> %x0, <8 x i32> %x1, p ; CHECK-NEXT: [[TMP42:%.*]] = or <32 x i1> [[TMP39]], [[TMP40]] ; CHECK-NEXT: [[TMP43:%.*]] = or <32 x i1> [[TMP42]], [[TMP41]] ; CHECK-NEXT: [[TMP44:%.*]] = sext <32 x i1> [[TMP43]] to <32 x i8> -; CHECK-NEXT: [[TMP45:%.*]] = bitcast <32 x i8> [[TMP44]] to <16 x i16> -; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <16 x i16> [[TMP45]], zeroinitializer -; CHECK-NEXT: [[TMP47:%.*]] = sext <16 x i1> [[TMP46]] to <16 x i16> -; CHECK-NEXT: [[TMP48:%.*]] = bitcast <16 x i16> [[TMP47]] to i256 -; CHECK-NEXT: [[TMP49:%.*]] = bitcast i256 [[TMP48]] to <8 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = bitcast <32 x i8> [[TMP44]] to <8 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i32> [[TMP45]], zeroinitializer +; CHECK-NEXT: [[TMP49:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i32> ; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP49]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]]) ; CHECK-NEXT: [[_MSPROP4:%.*]] = or <8 x i32> [[_MSPROP1]], [[_MSPROP3]] |