aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVitaly Buka <vitalybuka@google.com>2024-06-12 15:45:04 -0700
committerGitHub <noreply@github.com>2024-06-12 15:45:04 -0700
commite529cbea3f3333b1835258aeabd479e31a7e09dd (patch)
treeda46fb94678e2db381196c377c19e8e1ba0b95d8
parent1438623eddf002436d2b7f7fc302e502684677cc (diff)
downloadllvm-users/vitalybuka/spr/main.msan-handle-blendv-intrinsics.zip
llvm-users/vitalybuka/spr/main.msan-handle-blendv-intrinsics.tar.gz
llvm-users/vitalybuka/spr/main.msan-handle-blendv-intrinsics.tar.bz2
[msan] Handle blendv intrinsics (#94882)users/vitalybuka/spr/main.msan-handle-blendv-intrinsics
blendvs are very similar to select, so we adjust arguments and forward them into select handler.
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp40
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll44
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll31
-rw-r--r--llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll63
4 files changed, 136 insertions, 42 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 6cd9b68..5552f93 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3356,6 +3356,37 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ Value *convertBlendvToSelectMask(IRBuilder<> &IRB, Value *C) {
+ C = CreateAppToShadowCast(IRB, C);
+ FixedVectorType *FVT = cast<FixedVectorType>(C->getType());
+ unsigned ElSize = FVT->getElementType()->getPrimitiveSizeInBits();
+ C = IRB.CreateAShr(C, ElSize - 1);
+ FVT = FixedVectorType::get(IRB.getInt1Ty(), FVT->getNumElements());
+ return IRB.CreateTrunc(C, FVT);
+ }
+
+ // `blendv(f, t, c)` is effectively `select(c[top_bit], t, f)`.
+ void handleBlendvIntrinsic(IntrinsicInst &I) {
+ Value *C = I.getOperand(2);
+ Value *T = I.getOperand(1);
+ Value *F = I.getOperand(0);
+
+ Value *Sc = getShadow(&I, 2);
+ Value *Oc = MS.TrackOrigins ? getOrigin(C) : nullptr;
+
+ {
+ IRBuilder<> IRB(&I);
+ // Extract top bit from condition and its shadow.
+ C = convertBlendvToSelectMask(IRB, C);
+ Sc = convertBlendvToSelectMask(IRB, Sc);
+
+ setShadow(C, Sc);
+ setOrigin(C, Oc);
+ }
+
+ handleSelectLikeInst(I, C, T, F);
+ }
+
// Instrument sum-of-absolute-differences intrinsic.
void handleVectorSadIntrinsic(IntrinsicInst &I) {
const unsigned SignificantBitsPerResultElement = 16;
@@ -4027,6 +4058,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorPackIntrinsic(I);
break;
+ case Intrinsic::x86_sse41_pblendvb:
+ case Intrinsic::x86_sse41_blendvpd:
+ case Intrinsic::x86_sse41_blendvps:
+ case Intrinsic::x86_avx_blendv_pd_256:
+ case Intrinsic::x86_avx_blendv_ps_256:
+ case Intrinsic::x86_avx2_pblendvb:
+ handleBlendvIntrinsic(I);
+ break;
+
case Intrinsic::x86_avx_dp_ps_256:
case Intrinsic::x86_sse41_dppd:
case Intrinsic::x86_sse41_dpps:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
index d031e04..3d5e400 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
@@ -38,14 +38,24 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi
define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
; CHECK-LABEL: @test_x86_avx_blendv_pd_256(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i64> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]], <4 x double> [[A2:%.*]])
-; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[A2:%.*]] to <4 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]]
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[A1:%.*]] to <4 x i64>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x double> [[A0:%.*]] to <4 x i64>
+; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i64> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP12]], [[TMP2]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i64> [[TMP13]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[TMP8]], <4 x i64> [[TMP14]], <4 x i64> [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> [[A0]], <4 x double> [[A1]], <4 x double> [[A2]])
+; CHECK-NEXT: store <4 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x double> [[RES]]
;
%res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
@@ -56,14 +66,24 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4
define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
; CHECK-LABEL: @test_x86_avx_blendv_ps_256(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], <8 x float> [[A2:%.*]])
-; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[A2:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[A1:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x float> [[A0:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i32> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i32> [[TMP12]], [[TMP2]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i32> [[TMP13]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[TMP14]], <8 x i32> [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A0]], <8 x float> [[A1]], <8 x float> [[A2]])
+; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x float> [[RES]]
;
%res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
index 2759ca7..5efb7eb 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
@@ -816,8 +816,8 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK: 3:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
@@ -828,10 +828,10 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 {
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[_MSLD]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP8]], 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP8]], 0
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK: 10:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
@@ -881,14 +881,21 @@ define <16 x i16> @test_x86_avx2_packusdw_fold() #0 {
define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) #0 {
; CHECK-LABEL: @test_x86_avx2_pblendvb(
-; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i8> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]], <32 x i8> [[A2:%.*]])
-; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i8> [[TMP4]] to <32 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <32 x i8> [[TMP6]] to <32 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i8> [[A1:%.*]], [[A0:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i8> [[TMP9]], [[TMP2]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i8> [[TMP10]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> [[TMP11]], <32 x i8> [[TMP8]]
+; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A0]], <32 x i8> [[A1]], <32 x i8> [[A2]])
+; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i8> [[RES]]
;
%res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll
index e6775cf..a714558 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll
@@ -6,14 +6,24 @@ target triple = "x86_64-unknown-linux-gnu"
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
; CHECK-LABEL: @test_x86_sse41_blendvpd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]])
-; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], <i64 63, i64 63>
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], <i64 63, i64 63>
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]]
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x double> [[A0:%.*]] to <2 x i64>
+; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i64> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <2 x i64> [[TMP12]], [[TMP2]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <2 x i1> [[TMP8]], <2 x i64> [[TMP14]], <2 x i64> [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]])
+; CHECK-NEXT: store <2 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
@@ -24,14 +34,24 @@ declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x d
define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
; CHECK-LABEL: @test_x86_sse41_blendvps(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]])
-; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x float> [[A0:%.*]] to <4 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i32> [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i32> [[TMP12]], [[TMP2]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP14]], <4 x i32> [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]])
+; CHECK-NEXT: store <4 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
@@ -202,14 +222,21 @@ define <8 x i16> @test_x86_sse41_packusdw_fold() #0 {
define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) #0 {
; CHECK-LABEL: @test_x86_sse41_pblendvb(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]], <16 x i8> [[A2:%.*]])
-; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], [[TMP2]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i8> [[TMP10]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[TMP11]], <16 x i8> [[TMP8]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A0]], <16 x i8> [[A1]], <16 x i8> [[A2]])
+; CHECK-NEXT: store <16 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i8> [[RES]]
;
%res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]