From 90e450be03aa2efb5aa2d528c53f74108ec2e9c6 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 12 Jun 2024 09:24:31 -0700 Subject: =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20changes=20to?= =?UTF-8?q?=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 [skip ci] --- .../Transforms/Instrumentation/MemorySanitizer.cpp | 117 ++++++++++++++++++++- .../MemorySanitizer/X86/avx-intrinsics-x86.ll | 67 ++++++++---- .../MemorySanitizer/X86/avx2-intrinsics-x86.ll | 31 +++--- .../MemorySanitizer/X86/sse41-intrinsics-x86.ll | 101 ++++++++++-------- 4 files changed, 239 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 499f799..f11c1c5 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3289,6 +3289,106 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } + // Convert `Mask` into ``. + Constant *createDppMask(unsigned Width, unsigned Mask) { + SmallVector R(Width); + for (auto &M : R) { + M = ConstantInt::getBool(F.getContext(), Mask & 1); + Mask >>= 1; + } + return ConstantVector::get(R); + } + + // Calculate output shadow as array of booleans ``, assuming if any + // arg is poisoned, entire dot product is poisoned. + Value *findDppPoisonedOutput(IRBuilder<> &IRB, Value *S, unsigned SrcMask, + unsigned DstMask) { + const unsigned Width = + cast(S->getType())->getNumElements(); + + S = IRB.CreateSelect(createDppMask(Width, SrcMask), S, + Constant::getNullValue(S->getType())); + Value *SElem = IRB.CreateOrReduce(S); + Value *IsClean = IRB.CreateIsNull(SElem, "_msdpp"); + Value *DstMaskV = createDppMask(Width, DstMask); + + return IRB.CreateSelect( + IsClean, Constant::getNullValue(DstMaskV->getType()), DstMaskV); + } + + // See `Intel Intrinsics Guide` for `_dp_p*` instructions. + // + // 2 and 4 element versions produce single scalar of dot product, and then + // puts it into elements of output vector, selected by 4 lowest bits of the + // mask. Top 4 bits of the mask control which elements of input to use for dot + // product. + // + // 8 element version mask still has only 4 bit for input, and 4 bit for output + // mask. According to the spec it just operates as 4 element version on first + // 4 elements of inputs and output, and then on last 4 elements of inputs and + // output. + void handleDppIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + + Value *S0 = getShadow(&I, 0); + Value *S1 = getShadow(&I, 1); + Value *S = IRB.CreateOr(S0, S1); + + const unsigned Width = + cast(S->getType())->getNumElements(); + assert(Width == 2 || Width == 4 || Width == 8); + + const unsigned Mask = cast(I.getArgOperand(2))->getZExtValue(); + const unsigned SrcMask = Mask >> 4; + const unsigned DstMask = Mask & 0xf; + + // Calculate shadow as ``. + Value *SI1 = findDppPoisonedOutput(IRB, S, SrcMask, DstMask); + if (Width == 8) { + // First 4 elements of shadow are already calculated. `makeDppShadow` + // operats on 32 bit masks, so we can just shift masks, and repeat. + SI1 = IRB.CreateOr( + SI1, findDppPoisonedOutput(IRB, S, SrcMask << 4, DstMask << 4)); + } + // Extend to real size of shadow, poisoning either all or none bits of an + // element. + S = IRB.CreateSExt(SI1, S->getType(), "_msdpp"); + + setShadow(&I, S); + setOriginForNaryOp(I); + } + + Value *convertBlendvToSelectMask(IRBuilder<> &IRB, Value *C) { + C = CreateAppToShadowCast(IRB, C); + FixedVectorType *FVT = cast(C->getType()); + unsigned ElSize = FVT->getElementType()->getPrimitiveSizeInBits(); + C = IRB.CreateAShr(C, ElSize - 1); + FVT = FixedVectorType::get(IRB.getInt1Ty(), FVT->getNumElements()); + return IRB.CreateTrunc(C, FVT); + } + + // `blendv(f, t, c)` is effectively `select(c[top_bit], t, f)`. + void handleBlendvIntrinsic(IntrinsicInst &I) { + Value *C = I.getOperand(2); + Value *T = I.getOperand(1); + Value *F = I.getOperand(0); + + Value *Sc = getShadow(&I, 2); + Value *Oc = MS.TrackOrigins ? getOrigin(C) : nullptr; + + { + IRBuilder<> IRB(&I); + // Extract top bit from condition and its shadow. + C = convertBlendvToSelectMask(IRB, C); + Sc = convertBlendvToSelectMask(IRB, Sc); + + setShadow(C, Sc); + setOrigin(C, Oc); + } + + handleSelectLikeInst(I, C, T, F); + } + // Instrument sum-of-absolute-differences intrinsic. void handleVectorSadIntrinsic(IntrinsicInst &I) { const unsigned SignificantBitsPerResultElement = 16; @@ -3644,7 +3744,7 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } - SmallVector getPclmulMask(unsigned Width, bool OddElements) { + static SmallVector getPclmulMask(unsigned Width, bool OddElements) { SmallVector Mask; for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) { Mask.append(2, X); @@ -3960,6 +4060,21 @@ struct MemorySanitizerVisitor : public InstVisitor { handleVectorPackIntrinsic(I); break; + case Intrinsic::x86_sse41_pblendvb: + case Intrinsic::x86_sse41_blendvpd: + case Intrinsic::x86_sse41_blendvps: + case Intrinsic::x86_avx_blendv_pd_256: + case Intrinsic::x86_avx_blendv_ps_256: + case Intrinsic::x86_avx2_pblendvb: + handleBlendvIntrinsic(I); + break; + + case Intrinsic::x86_avx_dp_ps_256: + case Intrinsic::x86_sse41_dppd: + case Intrinsic::x86_sse41_dpps: + handleDppIntrinsic(I); + break; + case Intrinsic::x86_mmx_packsswb: case Intrinsic::x86_mmx_packuswb: handleVectorPackIntrinsic(I, 16); diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll index bc61970..3d5e400 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll @@ -38,14 +38,24 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { ; CHECK-LABEL: @test_x86_avx_blendv_pd_256( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i64> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]], <4 x double> [[A2:%.*]]) -; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[A2:%.*]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[A1:%.*]] to <4 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x double> [[A0:%.*]] to <4 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i64> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP12]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i64> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[TMP8]], <4 x i64> [[TMP14]], <4 x i64> [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> [[A0]], <4 x double> [[A1]], <4 x double> [[A2]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x double> [[RES]] ; %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] @@ -56,14 +66,24 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { ; CHECK-LABEL: @test_x86_avx_blendv_ps_256( -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], <8 x float> [[A2:%.*]]) -; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[A2:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[A1:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x float> [[A0:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i32> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i32> [[TMP12]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i32> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[TMP14]], <8 x i32> [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A0]], <8 x float> [[A1]], <8 x float> [[A2]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES]] ; %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] @@ -389,18 +409,19 @@ define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) #0 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> , <8 x i32> [[TMP3]], <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP4]]) +; CHECK-NEXT: [[_MSDPP:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[_MSDPP]], <8 x i1> zeroinitializer, <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> , <8 x i32> [[TMP3]], <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP7]]) +; CHECK-NEXT: [[_MSDPP1:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[_MSDPP1]], <8 x i1> zeroinitializer, <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[_MSDPP2:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 -18) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i32> [[_MSDPP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES]] ; %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 -18) ; <<8 x float>> [#uses=1] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll index 2759ca7..5efb7eb 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll @@ -816,8 +816,8 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -828,10 +828,10 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[_MSLD]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP8]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP8]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] @@ -881,14 +881,21 @@ define <16 x i16> @test_x86_avx2_packusdw_fold() #0 { define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) #0 { ; CHECK-LABEL: @test_x86_avx2_pblendvb( -; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i8> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]], <32 x i8> [[A2:%.*]]) -; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i8> [[TMP4]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = trunc <32 x i8> [[TMP6]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i8> [[A1:%.*]], [[A0:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i8> [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i8> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> [[TMP11]], <32 x i8> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A0]], <32 x i8> [[A1]], <32 x i8> [[A2]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[RES]] ; %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll index 36f8375..a714558 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll @@ -6,14 +6,24 @@ target triple = "x86_64-unknown-linux-gnu" define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { ; CHECK-LABEL: @test_x86_sse41_blendvpd( -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]]) -; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x double> [[A0:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i64> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <2 x i64> [[TMP12]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <2 x i1> [[TMP8]], <2 x i64> [[TMP14]], <2 x i64> [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[RES]] ; %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] @@ -24,14 +34,24 @@ declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x d define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { ; CHECK-LABEL: @test_x86_sse41_blendvps( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]]) -; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x float> [[A0:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i32> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i32> [[TMP12]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP14]], <4 x i32> [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] @@ -45,18 +65,14 @@ define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) #0 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0:![0-9]+]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> , <2 x i64> [[TMP3]], <2 x i64> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP4]]) +; CHECK-NEXT: [[_MSDPP:%.*]] = icmp eq i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[_MSDPP]], <2 x i1> zeroinitializer, <2 x i1> +; CHECK-NEXT: [[_MSDPP1:%.*]] = sext <2 x i1> [[TMP6]] to <2 x i64> ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 -18) -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <2 x i64> [[_MSDPP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[RES]] ; %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 -18) ; <<2 x double>> [#uses=1] @@ -70,18 +86,14 @@ define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i32> [[TMP3]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[_MSDPP:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[_MSDPP]], <4 x i1> zeroinitializer, <4 x i1> +; CHECK-NEXT: [[_MSDPP1:%.*]] = sext <4 x i1> [[TMP6]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 -18) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[_MSDPP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 -18) ; <<4 x float>> [#uses=1] @@ -100,7 +112,7 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) #0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -210,14 +222,21 @@ define <8 x i16> @test_x86_sse41_packusdw_fold() #0 { define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) #0 { ; CHECK-LABEL: @test_x86_sse41_pblendvb( -; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]], <16 x i8> [[A2:%.*]]) -; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i8> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[TMP11]], <16 x i8> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A0]], <16 x i8> [[A1]], <16 x i8> [[A2]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[RES]] ; %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] -- cgit v1.1