diff options
author | Florian Mayer <fmayer@google.com> | 2025-07-07 11:24:17 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-07 11:24:17 -0700 |
commit | 0032148ea642dfb2f17b36201e82fee454fa6ebe (patch) | |
tree | 6df31ce7e056bfbd2f7bd2b978e395b6320bfdf1 | |
parent | 778f60d92d30d7327dc426e3c1a94d9aae93987e (diff) | |
download | llvm-0032148ea642dfb2f17b36201e82fee454fa6ebe.zip llvm-0032148ea642dfb2f17b36201e82fee454fa6ebe.tar.gz llvm-0032148ea642dfb2f17b36201e82fee454fa6ebe.tar.bz2 |
[MSAN] handle permi2var (#146437)
5 files changed, 2852 insertions, 536 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 82bafa3..ec94dca 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -158,6 +158,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/bit.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -4272,6 +4273,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOrigin(&I, PtrSrcOrigin); } + void maskedCheckAVXIndexShadow(IRBuilder<> &IRB, Value *Idx, Instruction *I) { + auto IdxVectorSize = + cast<FixedVectorType>(Idx->getType())->getNumElements(); + assert(isPowerOf2_64(IdxVectorSize)); + auto *IdxVectorElemType = + cast<FixedVectorType>(Idx->getType())->getElementType(); + Constant *IndexBits = + ConstantInt::get(IdxVectorElemType, IdxVectorSize - 1); + auto *IdxShadow = getShadow(Idx); + // Only the low bits of Idx are used. + Value *V = nullptr; + for (size_t i = 0; i < IdxVectorSize; ++i) { + V = IRB.CreateExtractElement(IdxShadow, i); + assert(V->getType() == IndexBits->getType()); + V = IRB.CreateOr(V, IRB.CreateAnd(V, IndexBits)); + } + insertShadowCheck(V, getOrigin(Idx), I); + } + // Instrument AVX permutation intrinsic. // We apply the same permutation (argument index 1) to the shadow. void handleAVXVpermilvar(IntrinsicInst &I) { @@ -4289,6 +4309,39 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } + // Instrument AVX permutation intrinsic. + // We apply the same permutation (argument index 1) to the shadows. + void handleAVXVpermi2var(IntrinsicInst &I) { + assert(I.arg_size() == 3); + assert(isa<FixedVectorType>(I.getArgOperand(0)->getType())); + assert(isa<FixedVectorType>(I.getArgOperand(1)->getType())); + assert(isa<FixedVectorType>(I.getArgOperand(2)->getType())); + [[maybe_unused]] auto ArgVectorSize = + cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements(); + assert(cast<FixedVectorType>(I.getArgOperand(1)->getType()) + ->getNumElements() == ArgVectorSize); + assert(cast<FixedVectorType>(I.getArgOperand(2)->getType()) + ->getNumElements() == ArgVectorSize); + assert(I.getArgOperand(0)->getType() == I.getArgOperand(2)->getType()); + assert(I.getType() == I.getArgOperand(0)->getType()); + assert(I.getArgOperand(1)->getType()->isIntOrIntVectorTy()); + IRBuilder<> IRB(&I); + Value *AShadow = getShadow(&I, 0); + Value *Idx = I.getArgOperand(1); + Value *BShadow = getShadow(&I, 2); + + maskedCheckAVXIndexShadow(IRB, Idx, &I); + + // Shadows are integer-ish types but some intrinsics require a + // different (e.g., floating-point) type. + AShadow = IRB.CreateBitCast(AShadow, I.getArgOperand(0)->getType()); + BShadow = IRB.CreateBitCast(BShadow, I.getArgOperand(2)->getType()); + CallInst *CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), + {AShadow, Idx, BShadow}); + setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I))); + setOriginForNaryOp(I); + } + // Instrument BMI / BMI2 intrinsics. // All of these intrinsics are Z = I(X, Y) // where the types of all operands and the result match, and are either i32 or @@ -5244,6 +5297,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { break; } + case Intrinsic::x86_avx512_vpermi2var_d_128: + case Intrinsic::x86_avx512_vpermi2var_d_256: + case Intrinsic::x86_avx512_vpermi2var_d_512: + case Intrinsic::x86_avx512_vpermi2var_hi_128: + case Intrinsic::x86_avx512_vpermi2var_hi_256: + case Intrinsic::x86_avx512_vpermi2var_hi_512: + case Intrinsic::x86_avx512_vpermi2var_pd_128: + case Intrinsic::x86_avx512_vpermi2var_pd_256: + case Intrinsic::x86_avx512_vpermi2var_pd_512: + case Intrinsic::x86_avx512_vpermi2var_ps_128: + case Intrinsic::x86_avx512_vpermi2var_ps_256: + case Intrinsic::x86_avx512_vpermi2var_ps_512: + case Intrinsic::x86_avx512_vpermi2var_q_128: + case Intrinsic::x86_avx512_vpermi2var_q_256: + case Intrinsic::x86_avx512_vpermi2var_q_512: + case Intrinsic::x86_avx512_vpermi2var_qi_128: + case Intrinsic::x86_avx512_vpermi2var_qi_256: + case Intrinsic::x86_avx512_vpermi2var_qi_512: + handleAVXVpermi2var(I); + break; + case Intrinsic::x86_avx512fp16_mask_add_sh_round: case Intrinsic::x86_avx512fp16_mask_sub_sh_round: case Intrinsic::x86_avx512fp16_mask_mul_sh_round: diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll index 5aeaa12..2c9c6c7 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll @@ -13700,8 +13700,8 @@ define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32 ; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_d_512( ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP14:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] @@ -13714,9 +13714,62 @@ define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X4:%.*]]) +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP42]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP14]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP45]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP45]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP14]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP14]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP14]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP14]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP14]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP14]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP14]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP14]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP14]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP14]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP14]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP14]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP14]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1:%.*]], <16 x i32> [[TMP4]]) +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]] +; CHECK: 59: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 60: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1]], <16 x i32> [[X4:%.*]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[TMP10]] ; @@ -13744,9 +13797,62 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1:%.*]], <16 x i32> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]] +; CHECK: 59: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 60: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] @@ -13768,25 +13874,46 @@ declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_pd_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP6]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP6]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP23]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP23]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP8]], i64 3 +; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP8]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP8]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP8]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP3]] to <8 x double> +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP4]], <8 x i64> [[X1:%.*]], <8 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP36]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP32:%.*]], label [[TMP33:%.*]], !prof [[PROF1]] +; CHECK: 32: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 33: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1]], <8 x double> [[X2:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double> -; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x double> [[TMP9]] ; %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) @@ -13797,32 +13924,53 @@ define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, ; ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 9: -; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]]) +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP7]], 7 +; CHECK-NEXT: [[TMP38:%.*]] = or i64 [[TMP7]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP24]], 7 +; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP24]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 7 +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP31:%.*]] = and i64 [[TMP30]], 7 +; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 7 +; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 7 +; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to <8 x double> +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP5]], <8 x i64> [[X1:%.*]], <8 x double> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP43]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP44:%.*]], label [[TMP45:%.*]], !prof [[PROF1]] +; CHECK: 33: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 34: +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1]], <8 x double> [[X2:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> zeroinitializer, <8 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP8]], <8 x i64> [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x double> [[TMP10]] to <8 x i64> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = xor <8 x i64> [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP8]] ; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP2]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP19]], <8 x i64> [[TMP14]] ; CHECK-NEXT: [[TMP20:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP10]], <8 x double> [[TMP11]] @@ -13838,25 +13986,70 @@ declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP6]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP6]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP39]], [[TMP42]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP8]], i64 3 +; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP8]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP8]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP8]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP8]], i64 8 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP8]], i64 9 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP8]], i64 10 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP8]], i64 11 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP8]], i64 12 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP8]], i64 13 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP8]], i64 14 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP8]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP34]], [[TMP50]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float> +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP4]], <16 x i32> [[X1:%.*]], <16 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP35]], 0 +; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP56:%.*]], label [[TMP57:%.*]], !prof [[PROF1]] +; CHECK: 56: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 57: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1]], <16 x float> [[X2:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float> -; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x float> [[TMP9]] ; %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) @@ -13867,32 +14060,77 @@ define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, ; ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 9: -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP7]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP7]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP40]], [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP52]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12 +; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15 +; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15 +; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15 +; CHECK-NEXT: [[TMP68:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15 +; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15 +; CHECK-NEXT: [[TMP69:%.*]] = or i32 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X1:%.*]], <16 x float> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP69]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP57:%.*]], label [[TMP58:%.*]], !prof [[PROF1]] +; CHECK: 57: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 58: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1]], <16 x float> [[X2:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> zeroinitializer, <16 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP8]], <16 x i32> [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], [[TMP8]] ; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i32> [[TMP18]], [[TMP2]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP19]], <16 x i32> [[TMP14]] ; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP13]], <16 x float> [[TMP10]], <16 x float> [[TMP11]] @@ -13908,12 +14146,41 @@ declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, < define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_q_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP20]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP20]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP23]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP23]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP8]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP8]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP8]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP8]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]], <8 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]] +; CHECK: 29: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 30: +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1]], <8 x i64> [[X2:%.*]]) ; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i64> [[TMP4]] ; @@ -13925,13 +14192,42 @@ define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i ; ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP21]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP13]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]], <8 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP38:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1]], <8 x i64> [[X2:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP2]] @@ -13968,9 +14264,62 @@ define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X0:%.*]], <16 x i32> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]] +; CHECK: 59: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 60: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer @@ -13999,7 +14348,7 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, < ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable @@ -14013,26 +14362,47 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, < ; CHECK-NEXT: [[X2INS:%.*]] = insertelement <8 x double> [[EXTRA_PARAM:%.*]], double [[X2S]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> [[TMP5]], <8 x i32> zeroinitializer ; CHECK-NEXT: [[X2:%.*]] = shufflevector <8 x double> [[X2INS]], <8 x double> [[EXTRA_PARAM]], <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP14:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] -; CHECK: 14: +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP27]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP27]], [[TMP12]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 7 +; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i64> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP30]], 7 +; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP30]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7 +; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 7 +; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP39:%.*]] = and i64 [[TMP38]], 7 +; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i64> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP41:%.*]] = and i64 [[TMP40]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7 +; CHECK-NEXT: [[TMP47:%.*]] = or i64 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to <8 x double> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <8 x double> +; CHECK-NEXT: [[TMP13:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP11]], <8 x i64> [[X0:%.*]], <8 x double> [[TMP24]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x double> [[TMP13]] to <8 x i64> +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP47]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP48:%.*]], label [[TMP49:%.*]], !prof [[PROF1]] +; CHECK: 39: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: 15: -; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x double> [[X2]]) +; CHECK: 40: +; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0]], <8 x double> [[X2]]) ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP14]], <8 x i64> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x double> [[TMP15]] to <8 x i64> ; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], [[TMP14]] ; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP22]], <8 x i64> [[TMP18]] ; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP17]], <8 x double> [[TMP15]], <8 x double> zeroinitializer @@ -14052,30 +14422,75 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, ; ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 9: -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP7]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP7]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP40]], [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP52]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP52]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP9]], i64 9 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP9]], i64 10 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP9]], i64 11 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP9]], i64 12 +; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15 +; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP9]], i64 13 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15 +; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP9]], i64 14 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15 +; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP9]], i64 15 +; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15 +; CHECK-NEXT: [[TMP68:%.*]] = or i32 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float> +; CHECK-NEXT: [[TMP19:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X0:%.*]], <16 x float> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP19]] to <16 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP68]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP57:%.*]], label [[TMP58:%.*]], !prof [[PROF1]] +; CHECK: 57: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 58: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0]], <16 x float> [[X2:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP8]], <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP8]] ; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP17]], <16 x i32> [[TMP13]] ; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP12]], <16 x float> [[TMP10]], <16 x float> zeroinitializer @@ -14093,13 +14508,42 @@ define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x ; ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP21]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP13]], i64 3 +; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i64> [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP31]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP31]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP13]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP13]], i64 6 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP13]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X0:%.*]], <8 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0]], <8 x i64> [[X2:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer @@ -14120,12 +14564,65 @@ declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32> define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_d_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP36]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 15 +; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP39]], 15 +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP8]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP8]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP8]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP8]], i64 8 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP8]], i64 9 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP8]], i64 10 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP8]], i64 11 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP8]], i64 12 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP8]], i64 13 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP8]], i64 14 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP8]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP34]], [[TMP50]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0:%.*]], <16 x i32> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP35]], 0 +; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP53:%.*]], label [[TMP54:%.*]], !prof [[PROF1]] +; CHECK: 53: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 54: +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2:%.*]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[TMP4]] ; @@ -14137,13 +14634,66 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 ; ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]]) +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP37]], 15 +; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP37]], [[TMP40]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 15 +; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP13]], i64 3 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], 15 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP45]], [[TMP46]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i32> [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP47]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP47]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP13]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i32> [[TMP13]], i64 6 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP13]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP13]], i64 9 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP13]], i64 10 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP13]], i64 11 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP13]], i64 12 +; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP13]], i64 13 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP13]], i64 14 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP13]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP35]], 15 +; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP35]], [[TMP51]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0:%.*]], <16 x i32> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP36]], 0 +; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP54:%.*]], label [[TMP55:%.*]], !prof [[PROF1]] +; CHECK: 54: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 55: +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP1]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll index 1644a5e..43595dc 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll @@ -5467,9 +5467,62 @@ define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP41]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP41]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP11]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP13]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP39]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1:%.*]], <16 x i32> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP56]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP58:%.*]], label [[TMP59:%.*]], !prof [[PROF1]] +; CHECK: 58: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 59: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[TMP9]] ; @@ -5496,9 +5549,62 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1:%.*]], <16 x i32> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]] +; CHECK: 59: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 60: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1]], <16 x i32> [[X2]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] @@ -5522,24 +5628,45 @@ declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>, define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_pd_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP6]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP6]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP23]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP23]], [[TMP26]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP8]], i64 3 +; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP10]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP10]], [[TMP27]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP8]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP8]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP8]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP3]] to <8 x double> +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP4]], <8 x i64> [[X1:%.*]], <8 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP35]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP32:%.*]], label [[TMP33:%.*]], !prof [[PROF1]] +; CHECK: 32: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]]) -; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK: 33: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1]], <8 x double> [[X2:%.*]]) +; CHECK-NEXT: store <8 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x double> [[TMP9]] ; %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) @@ -5549,32 +5676,53 @@ define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 { ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 9: -; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]]) +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP7]], 7 +; CHECK-NEXT: [[TMP38:%.*]] = or i64 [[TMP7]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP24]], 7 +; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP24]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 7 +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP31:%.*]] = and i64 [[TMP30]], 7 +; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 7 +; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 7 +; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to <8 x double> +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP5]], <8 x i64> [[X1:%.*]], <8 x double> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP43]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP44:%.*]], label [[TMP45:%.*]], !prof [[PROF1]] +; CHECK: 33: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 34: +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1]], <8 x double> [[X2:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> zeroinitializer, <8 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP8]], <8 x i64> [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x double> [[TMP10]] to <8 x i64> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = xor <8 x i64> [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP8]] ; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP2]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP19]], <8 x i64> [[TMP14]] ; CHECK-NEXT: [[TMP20:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP10]], <8 x double> [[TMP11]] @@ -5593,24 +5741,69 @@ declare <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float>, <16 x i32> define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]]) -; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP6]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP6]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP39]], [[TMP42]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i64 3 +; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP10]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP10]], [[TMP43]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP8]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP8]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP8]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP8]], i64 8 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP8]], i64 9 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP8]], i64 10 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP8]], i64 11 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP8]], i64 12 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP8]], i64 13 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP8]], i64 14 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP8]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP34]], [[TMP50]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float> +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP4]], <16 x i32> [[X1:%.*]], <16 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP35]], 0 +; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP56:%.*]], label [[TMP57:%.*]], !prof [[PROF1]] +; CHECK: 56: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 57: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: store <16 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x float> [[TMP9]] ; %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) @@ -5620,32 +5813,77 @@ define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 { ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 9: -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP7]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP7]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP40]], [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP52]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12 +; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15 +; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15 +; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15 +; CHECK-NEXT: [[TMP68:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15 +; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15 +; CHECK-NEXT: [[TMP69:%.*]] = or i32 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X1:%.*]], <16 x float> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP69]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP57:%.*]], label [[TMP58:%.*]], !prof [[PROF1]] +; CHECK: 57: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 58: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1]], <16 x float> [[X2:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> zeroinitializer, <16 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP8]], <16 x i32> [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], [[TMP8]] ; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i32> [[TMP18]], [[TMP2]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP19]], <16 x i32> [[TMP14]] ; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP13]], <16 x float> [[TMP10]], <16 x float> [[TMP11]] @@ -5664,12 +5902,41 @@ declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_q_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP20]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP20]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP23]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP23]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP8]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP8]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP8]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP8]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]], <8 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]] +; CHECK: 29: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 30: +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1]], <8 x i64> [[X2:%.*]]) ; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i64> [[TMP4]] ; @@ -5680,13 +5947,42 @@ define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> % define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP21]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP13]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]], <8 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP38:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1]], <8 x i64> [[X2:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP2]] @@ -5722,9 +6018,62 @@ define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X0:%.*]], <16 x i32> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]] +; CHECK: 59: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 60: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer @@ -5753,7 +6102,7 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, < ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable @@ -5767,26 +6116,47 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, < ; CHECK-NEXT: [[X2INS:%.*]] = insertelement <8 x double> [[EXTRA_PARAM:%.*]], double [[X2S]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> [[TMP6]], <8 x i32> zeroinitializer ; CHECK-NEXT: [[X2:%.*]] = shufflevector <8 x double> [[X2INS]], <8 x double> [[EXTRA_PARAM2:%.*]], <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] -; CHECK: 15: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 16: -; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x double> [[X2]]) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP12]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP12]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7 +; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i64> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP34:%.*]] = and i64 [[TMP31]], 7 +; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP31]], [[TMP34]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP36:%.*]] = and i64 [[TMP35]], 7 +; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP37]], 7 +; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP40:%.*]] = and i64 [[TMP39]], 7 +; CHECK-NEXT: [[TMP47:%.*]] = or i64 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i64> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP42:%.*]] = and i64 [[TMP41]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP26]], 7 +; CHECK-NEXT: [[TMP48:%.*]] = or i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP2]] to <8 x double> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <8 x double> +; CHECK-NEXT: [[TMP14:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP24]], <8 x i64> [[X0:%.*]], <8 x double> [[TMP13]]) +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <8 x double> [[TMP14]] to <8 x i64> +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP48]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP49:%.*]], label [[TMP50:%.*]], !prof [[PROF1]] +; CHECK: 40: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 41: +; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0]], <8 x double> [[X2]]) ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP25]], <8 x i64> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x double> [[TMP15]] to <8 x i64> ; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], [[TMP25]] ; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP22]], <8 x i64> [[TMP18]] ; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP17]], <8 x double> [[TMP15]], <8 x double> zeroinitializer @@ -5805,30 +6175,75 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, < define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 { ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 9: -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP7]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP7]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP40]], [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP52]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP52]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP9]], i64 9 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP9]], i64 10 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP9]], i64 11 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP9]], i64 12 +; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15 +; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP9]], i64 13 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15 +; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP9]], i64 14 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15 +; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP9]], i64 15 +; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15 +; CHECK-NEXT: [[TMP68:%.*]] = or i32 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float> +; CHECK-NEXT: [[TMP19:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X0:%.*]], <16 x float> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP19]] to <16 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP68]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP57:%.*]], label [[TMP58:%.*]], !prof [[PROF1]] +; CHECK: 57: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 58: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0]], <16 x float> [[X2:%.*]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP8]], <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP8]] ; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP17]], <16 x i32> [[TMP13]] ; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP12]], <16 x float> [[TMP10]], <16 x float> zeroinitializer @@ -5844,13 +6259,42 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP21]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP13]], i64 3 +; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i64> [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP31]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP31]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP13]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP13]], i64 6 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP13]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X0:%.*]], <8 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0]], <8 x i64> [[X2:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer @@ -5871,12 +6315,65 @@ define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_d_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 -; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP36]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 15 +; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP39]], 15 +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP8]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP8]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP8]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP8]], i64 8 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP8]], i64 9 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP8]], i64 10 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP8]], i64 11 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP8]], i64 12 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP8]], i64 13 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP8]], i64 14 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP8]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP34]], [[TMP50]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0:%.*]], <16 x i32> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP35]], 0 +; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP53:%.*]], label [[TMP54:%.*]], !prof [[PROF1]] +; CHECK: 53: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 54: +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2:%.*]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[TMP4]] ; @@ -5887,13 +6384,66 @@ define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512( ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]]) +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP37]], 15 +; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP37]], [[TMP40]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 15 +; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP13]], i64 3 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], 15 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP45]], [[TMP46]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i32> [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP47]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP47]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP13]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i32> [[TMP13]], i64 6 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP13]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP13]], i64 9 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP13]], i64 10 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP13]], i64 11 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP13]], i64 12 +; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP13]], i64 13 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP13]], i64 14 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP13]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP35]], 15 +; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP35]], [[TMP51]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0:%.*]], <16 x i32> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP36]], 0 +; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP54:%.*]], label [[TMP55:%.*]], !prof [[PROF1]] +; CHECK: 54: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 55: +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP1]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll index 14d68b4..7d45cec 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll @@ -1901,11 +1901,28 @@ define <4 x i32>@test_int_x86_avx512_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> % ; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_vpermi2var_d_128( ; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3 +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP14]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP6]], <4 x i32> [[X1]], <4 x i32> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -1919,12 +1936,29 @@ define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i ; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_mask_vpermi2var_d_128( ; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[_MSPROP1]], [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP13]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP9]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP9]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP8]], <4 x i32> [[X1]], <4 x i32> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]] +; CHECK: [[BB18]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB19]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -1950,11 +1984,28 @@ define <4 x i32>@test_int_x86_avx512_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> % ; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_vpermt2var_d_128( ; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3 +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP14]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP6]], <4 x i32> [[X0]], <4 x i32> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X1]], <4 x i32> [[X0]], <4 x i32> [[X2]]) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] @@ -1968,12 +2019,29 @@ define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i ; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_mask_vpermt2var_d_128( ; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[_MSPROP1]], [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP13]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP9]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP9]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP8]], <4 x i32> [[X0]], <4 x i32> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]] +; CHECK: [[BB18]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB19]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X1]], <4 x i32> [[X0]], <4 x i32> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2000,12 +2068,29 @@ define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x ; CHECK-LABEL: define <4 x i32> @test_int_x86_avx512_maskz_vpermt2var_d_128( ; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i32> [[_MSPROP1]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP8]], <4 x i32> [[X0]], <4 x i32> [[TMP9]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]] +; CHECK: [[BB18]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB19]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X1]], <4 x i32> [[X0]], <4 x i32> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2033,11 +2118,40 @@ define <8 x i32>@test_int_x86_avx512_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> % ; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_vpermi2var_d_256( ; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP22]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP22]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP6]], <8 x i32> [[X1]], <8 x i32> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB29:.*]], label %[[BB30:.*]], !prof [[PROF1]] +; CHECK: [[BB29]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB30]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] @@ -2051,12 +2165,41 @@ define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i ; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_mask_vpermi2var_d_256( ; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP6]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP9]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP9]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP13]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP13]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP29]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP29]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP8]], <8 x i32> [[X1]], <8 x i32> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[BB30]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB31]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2079,11 +2222,40 @@ define <8 x i32>@test_int_x86_avx512_ask_vpermt2var_d_256(<8 x i32> %x0, <8 x i3 ; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_ask_vpermt2var_d_256( ; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP22]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP22]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP6]], <8 x i32> [[X0]], <8 x i32> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB29:.*]], label %[[BB30:.*]], !prof [[PROF1]] +; CHECK: [[BB29]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB30]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X1]], <8 x i32> [[X0]], <8 x i32> [[X2]]) ; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] @@ -2097,12 +2269,41 @@ define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i ; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_mask_vpermt2var_d_256( ; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP6]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP9]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP9]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP13]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP13]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP29]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP29]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP8]], <8 x i32> [[X0]], <8 x i32> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[BB30]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB31]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X1]], <8 x i32> [[X0]], <8 x i32> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2126,12 +2327,41 @@ define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x ; CHECK-LABEL: define <8 x i32> @test_int_x86_avx512_maskz_vpermt2var_d_256( ; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP9]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP29]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP29]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP8]], <8 x i32> [[X0]], <8 x i32> [[TMP9]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[BB30]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB31]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X1]], <8 x i32> [[X0]], <8 x i32> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2156,24 +2386,27 @@ define <2 x double>@test_int_x86_avx512_vpermi2var_pd_128(<2 x double> %x0, <2 x ; CHECK-LABEL: define <2 x double> @test_int_x86_avx512_vpermi2var_pd_128( ; CHECK-SAME: <2 x double> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x double> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP9]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP6]], [[TMP13]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP9]] to <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <2 x double> +; CHECK-NEXT: [[TMP10:%.*]] = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> [[TMP8]], <2 x i64> [[X1]], <2 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x double> [[TMP10]] to <2 x i64> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] +; CHECK: [[BB14]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK: [[BB15]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> [[X0]], <2 x i64> [[X1]], <2 x double> [[X2]]) -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) @@ -2185,34 +2418,37 @@ define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, ; CHECK-LABEL: define <2 x double> @test_int_x86_avx512_mask_vpermi2var_pd_128( ; CHECK-SAME: <2 x double> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x double> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP11]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP13]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP8]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] -; CHECK: [[BB8]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: [[BB9]]: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP15]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i64> [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP11]] to <2 x double> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP8]] to <2 x double> +; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> [[TMP9]], <2 x i64> [[X1]], <2 x double> [[TMP12]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <2 x double> [[TMP17]] to <2 x i64> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]] +; CHECK: [[BB15]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB16]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> [[X0]], <2 x i64> [[X1]], <2 x double> [[X2]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[X1]] to <2 x double> ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[X3]] to <8 x i1> ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i1> [[TMP14]], <8 x i1> [[TMP14]], <2 x i32> <i32 0, i32 1> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP3]], <2 x i32> <i32 0, i32 1> -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[EXTRACT]], <2 x i64> zeroinitializer, <2 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[EXTRACT]], <2 x i64> [[TMP18]], <2 x i64> [[TMP13]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x double> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i64> [[TMP7]], [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = or <2 x i64> [[TMP20]], [[TMP13]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <2 x i1> [[_MSPROP]], <2 x i64> [[TMP21]], <2 x i64> [[TMP16]] ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[EXTRACT]], <2 x double> [[TMP1]], <2 x double> [[TMP2]] @@ -2233,24 +2469,33 @@ define <4 x double>@test_int_x86_avx512_vpermi2var_pd_256(<4 x double> %x0, <4 x ; CHECK-LABEL: define <4 x double> @test_int_x86_avx512_vpermi2var_pd_256( ; CHECK-SAME: <4 x double> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x double> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP9]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP8]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP6]], 3 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP6]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 3 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP17]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP17]], [[TMP11]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP9]] to <4 x double> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP4]] to <4 x double> +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> [[TMP8]], <4 x i64> [[X1]], <4 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x double> [[TMP10]] to <4 x i64> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB20]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB21]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> [[X0]], <4 x i64> [[X1]], <4 x double> [[X2]]) -; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %1 = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) @@ -2262,34 +2507,43 @@ define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, ; CHECK-LABEL: define <4 x double> @test_int_x86_avx512_mask_vpermi2var_pd_256( ; CHECK-SAME: <4 x double> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x double> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i64> [[TMP11]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP9]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i64> [[TMP13]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i64> [[TMP8]] to i256 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP15]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] -; CHECK: [[BB8]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: [[BB9]]: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP15]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 3 +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 3 +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP13]], i64 3 +; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP26]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i64> [[TMP11]] to <4 x double> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i64> [[TMP8]] to <4 x double> +; CHECK-NEXT: [[TMP17:%.*]] = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> [[TMP9]], <4 x i64> [[X1]], <4 x double> [[TMP12]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <4 x double> [[TMP17]] to <4 x i64> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP31]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] +; CHECK: [[BB21]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB22]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> [[X0]], <4 x i64> [[X1]], <4 x double> [[X2]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[X1]] to <4 x double> ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[X3]] to <8 x i1> ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i1> [[TMP14]], <8 x i1> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> zeroinitializer, <4 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP18]], <4 x i64> [[TMP13]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x double> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[TMP2]] to <4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i64> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i64> [[TMP7]], [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i64> [[TMP20]], [[TMP13]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP]], <4 x i64> [[TMP21]], <4 x i64> [[TMP16]] ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[TMP2]] @@ -2310,24 +2564,33 @@ define <4 x float>@test_int_x86_avx512_vpermi2var_ps_128(<4 x float> %x0, <4 x i ; CHECK-LABEL: define <4 x float> @test_int_x86_avx512_vpermi2var_ps_128( ; CHECK-SAME: <4 x float> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x float> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP9]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP6]], 3 +; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP6]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP17]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP17]], [[TMP11]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP9]] to <4 x float> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[TMP8]], <4 x i32> [[X1]], <4 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x float> [[TMP10]] to <4 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB20]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB21]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[X0]], <4 x i32> [[X1]], <4 x float> [[X2]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) @@ -2339,34 +2602,43 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, < ; CHECK-LABEL: define <4 x float> @test_int_x86_avx512_mask_vpermi2var_ps_128( ; CHECK-SAME: <4 x float> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x float> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP11]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP13]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP8]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP15]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] -; CHECK: [[BB8]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: [[BB9]]: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 3 +; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 3 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP13]], i64 3 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP11]] to <4 x float> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float> +; CHECK-NEXT: [[TMP17:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[TMP9]], <4 x i32> [[X1]], <4 x float> [[TMP12]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <4 x float> [[TMP17]] to <4 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP31]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] +; CHECK: [[BB21]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB22]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[X0]], <4 x i32> [[X1]], <4 x float> [[X2]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[X1]] to <4 x float> ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[X3]] to <8 x i1> ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i1> [[TMP14]], <8 x i1> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> zeroinitializer, <4 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> [[TMP18]], <4 x i32> [[TMP13]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP7]], [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP20]], [[TMP13]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP]], <4 x i32> [[TMP21]], <4 x i32> [[TMP16]] ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[EXTRACT]], <4 x float> [[TMP1]], <4 x float> [[TMP2]] @@ -2392,30 +2664,39 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> % ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP11]] to <4 x i32> ; CHECK-NEXT: [[X1CAST:%.*]] = bitcast <2 x i64> [[X1]] to <4 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP12]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP19]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP14]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP13]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] -; CHECK: [[BB9]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: [[BB10]]: +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP20]], [[TMP23]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP8]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP14]], i64 2 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP14]], i64 3 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP12]] to <4 x float> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <4 x i32> [[TMP13]] to <4 x float> +; CHECK-NEXT: [[TMP19:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[TMP16]], <4 x i32> [[X1CAST]], <4 x float> [[TMP18]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x float> [[TMP19]] to <4 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP32]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]] +; CHECK: [[BB22]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB23]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[X0]], <4 x i32> [[X1CAST]], <4 x float> [[X2]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[X1CAST]] to <4 x float> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[X3]] to <8 x i1> ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i1> [[TMP15]], <8 x i1> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> zeroinitializer, <4 x i32> [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> [[TMP9]], <4 x i32> [[TMP14]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP7]], [[TMP9]] ; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i32> [[TMP21]], [[TMP14]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP]], <4 x i32> [[TMP22]], <4 x i32> [[TMP17]] ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[EXTRACT]], <4 x float> [[TMP1]], <4 x float> [[TMP2]] @@ -2437,24 +2718,45 @@ define <8 x float>@test_int_x86_avx512_vpermi2var_ps_256(<8 x float> %x0, <8 x i ; CHECK-LABEL: define <8 x float> @test_int_x86_avx512_vpermi2var_ps_256( ; CHECK-SAME: <8 x float> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x float> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP9]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP8]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]] -; CHECK: [[BB7]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: [[BB8]]: +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP6]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP6]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP25]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP25]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP9]] to <8 x float> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP4]] to <8 x float> +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> [[TMP8]], <8 x i32> [[X1]], <8 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x float> [[TMP10]] to <8 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB32:.*]], label %[[BB33:.*]], !prof [[PROF1]] +; CHECK: [[BB32]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB33]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> [[X0]], <8 x i32> [[X1]], <8 x float> [[X2]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %1 = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) @@ -2466,32 +2768,53 @@ define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, < ; CHECK-LABEL: define <8 x float> @test_int_x86_avx512_mask_vpermi2var_ps_256( ; CHECK-SAME: <8 x float> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x float> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP11:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP11]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP9]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i32> [[TMP13]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP12]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i32> [[TMP8]] to i256 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP15]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] -; CHECK: [[BB8]]: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: [[BB9]]: +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 7 +; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP13]], i64 3 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 7 +; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i32> [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 7 +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP13]], i64 5 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP15]], 7 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP15]], [[TMP32]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP13]], i64 6 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 7 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP13]], i64 7 +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP19]], 7 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP19]], [[TMP35]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP11]] to <8 x float> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float> +; CHECK-NEXT: [[TMP17:%.*]] = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> [[TMP9]], <8 x i32> [[X1]], <8 x float> [[TMP12]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x float> [[TMP17]] to <8 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP43]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB33:.*]], label %[[BB34:.*]], !prof [[PROF1]] +; CHECK: [[BB33]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB34]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> [[X0]], <8 x i32> [[X1]], <8 x float> [[X2]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[X1]] to <8 x float> ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[X3]] to <8 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP3]], <8 x i32> zeroinitializer, <8 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP3]], <8 x i32> [[TMP18]], <8 x i32> [[TMP13]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x float> [[TMP1]] to <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x float> [[TMP2]] to <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i32> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i32> [[TMP7]], [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i32> [[TMP20]], [[TMP13]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP14]], <8 x i32> [[TMP21]], <8 x i32> [[TMP16]] ; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x float> [[TMP1]], <8 x float> [[TMP2]] @@ -2511,11 +2834,22 @@ define <2 x i64>@test_int_x86_avx512_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> % ; CHECK-LABEL: define <2 x i64> @test_int_x86_avx512_vpermi2var_q_128( ; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[X1]], <2 x i64> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB12]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[X1]], <2 x i64> [[X2]]) ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] @@ -2529,12 +2863,23 @@ define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i ; CHECK-LABEL: define <2 x i64> @test_int_x86_avx512_mask_vpermi2var_q_128( ; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i64> [[_MSPROP1]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP9]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP8]], <2 x i64> [[X1]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]] +; CHECK: [[BB12]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB13]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[X1]], <2 x i64> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2560,11 +2905,22 @@ define <2 x i64>@test_int_x86_avx512_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> % ; CHECK-LABEL: define <2 x i64> @test_int_x86_avx512_vpermt2var_q_128( ; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[X0]], <2 x i64> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB12]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X1]], <2 x i64> [[X0]], <2 x i64> [[X2]]) ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] @@ -2578,12 +2934,23 @@ define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i ; CHECK-LABEL: define <2 x i64> @test_int_x86_avx512_mask_vpermt2var_q_128( ; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i64> [[_MSPROP1]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP9]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP8]], <2 x i64> [[X0]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]] +; CHECK: [[BB12]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB13]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X1]], <2 x i64> [[X0]], <2 x i64> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2610,12 +2977,23 @@ define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x ; CHECK-LABEL: define <2 x i64> @test_int_x86_avx512_maskz_vpermt2var_q_128( ; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP13:%.*]] = or <2 x i64> [[_MSPROP1]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP13:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP8]], <2 x i64> [[X0]], <2 x i64> [[TMP9]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]] +; CHECK: [[BB12]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB13]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X1]], <2 x i64> [[X0]], <2 x i64> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2643,11 +3021,28 @@ define <4 x i64>@test_int_x86_avx512_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> % ; CHECK-LABEL: define <4 x i64> @test_int_x86_avx512_vpermi2var_q_256( ; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i64> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3 +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP14]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP14]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP6]], <4 x i64> [[X1]], <4 x i64> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[X1]], <4 x i64> [[X2]]) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] @@ -2661,12 +3056,29 @@ define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i ; CHECK-LABEL: define <4 x i64> @test_int_x86_avx512_mask_vpermi2var_q_256( ; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i64> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i64> [[_MSPROP1]], [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP13]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP9]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP9]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 3 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP8]], <4 x i64> [[X1]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]] +; CHECK: [[BB18]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB19]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[X1]], <4 x i64> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2692,11 +3104,28 @@ define <4 x i64>@test_int_x86_avx512_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> % ; CHECK-LABEL: define <4 x i64> @test_int_x86_avx512_vpermt2var_q_256( ; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i64> [[_MSPROP]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3 +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP14]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP14]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP6]], <4 x i64> [[X0]], <4 x i64> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X1]], <4 x i64> [[X0]], <4 x i64> [[X2]]) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] @@ -2710,12 +3139,29 @@ define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i ; CHECK-LABEL: define <4 x i64> @test_int_x86_avx512_mask_vpermt2var_q_256( ; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i64> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i64> [[_MSPROP1]], [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP13]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP9]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP9]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 3 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP8]], <4 x i64> [[X0]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]] +; CHECK: [[BB18]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB19]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X1]], <4 x i64> [[X0]], <4 x i64> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -2742,12 +3188,29 @@ define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x ; CHECK-LABEL: define <4 x i64> @test_int_x86_avx512_maskz_vpermt2var_q_256( ; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i64> [[TMP8]], [[TMP3]] -; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i64> [[_MSPROP1]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 3 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP8]], <4 x i64> [[X0]], <4 x i64> [[TMP9]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]] +; CHECK: [[BB18]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: [[BB19]]: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X1]], <4 x i64> [[X0]], <4 x i64> [[X2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1> @@ -12267,8 +12730,7 @@ define <8 x i32> @combine_vpermi2d_vpermps(<16 x i32> noundef %a) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> splat (i32 -1), <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i32> [[_MSPROP]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[_MSPROP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[_MSPROP]], <8 x i32> <i32 14, i32 13, i32 6, i32 3, i32 5, i32 15, i32 0, i32 1>, <8 x i32> [[_MSPROP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP1]], <8 x i32> <i32 14, i32 13, i32 6, i32 3, i32 5, i32 15, i32 0, i32 1>, <8 x i32> [[TMP2]]) ; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[TMP3]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll index 2350d75..95c1bbf 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll @@ -16,8 +16,7 @@ define <2 x i64> @shuffle_vpermv3_v2i64(<2 x i64> %x0, <2 x i64> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP1]], <2 x i64> <i64 2, i64 0>, <2 x i64> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> <i64 2, i64 0>, <2 x i64> [[X1]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[R]] @@ -31,8 +30,7 @@ define <2 x i64> @shuffle_vpermv3_v2i64_unary(<2 x i64> %x0) #0 { ; CHECK-SAME: <2 x i64> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP1]], <2 x i64> <i64 2, i64 0>, <2 x i64> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> <i64 2, i64 0>, <2 x i64> [[X0]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[R]] @@ -55,8 +53,19 @@ define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits(<2 x i64> %x0, <2 x i64> %x ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <2 x i64> [[M]], <i64 0, i64 4> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[T]], <2 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: ; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[T]], <2 x i64> [[X1]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[R]] @@ -80,8 +89,19 @@ define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits_negative(<2 x i64> %x0, <2 ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <2 x i64> [[M]], <i64 0, i64 2> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[T]], <2 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB18]]: ; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[T]], <2 x i64> [[X1]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[R]] @@ -97,8 +117,7 @@ define <4 x i64> @shuffle_vpermv3_v4i64(<4 x i64> %x0, <4 x i64> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i64> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP1]], <4 x i64> <i64 7, i64 2, i64 6, i64 0>, <4 x i64> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> <i64 7, i64 2, i64 6, i64 0>, <4 x i64> [[X1]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[R]] @@ -112,8 +131,7 @@ define <4 x i64> @shuffle_vpermv3_v4i64_unary(<4 x i64> %x0) #0 { ; CHECK-SAME: <4 x i64> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i64> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP1]], <4 x i64> <i64 7, i64 2, i64 6, i64 0>, <4 x i64> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> <i64 7, i64 2, i64 6, i64 0>, <4 x i64> [[X0]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[R]] @@ -136,8 +154,25 @@ define <4 x i64> @shuffle_vpermv3_v4i64_demandedbits(<4 x i64> %x0, <4 x i64> %x ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i64> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <4 x i64> [[M]], <i64 0, i64 8, i64 16, i64 32> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 3 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP6]], <4 x i64> [[T]], <4 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB23:.*]], label %[[BB24:.*]], !prof [[PROF1]] +; CHECK: [[BB23]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB24]]: ; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[T]], <4 x i64> [[X1]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[R]] @@ -153,8 +188,7 @@ define <8 x i64> @shuffle_vpermv3_v8i64(<8 x i64> %x0, <8 x i64> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> <i64 8, i64 6, i64 10, i64 4, i64 12, i64 2, i64 14, i64 0>, <8 x i64> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> <i64 8, i64 6, i64 10, i64 4, i64 12, i64 2, i64 14, i64 0>, <8 x i64> [[X1]]) ; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i64> [[R]] @@ -168,8 +202,7 @@ define <8 x i64> @shuffle_vpermv3_v8i64_unary(<8 x i64> %x0) #0 { ; CHECK-SAME: <8 x i64> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> <i64 8, i64 6, i64 10, i64 4, i64 12, i64 2, i64 14, i64 0>, <8 x i64> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> <i64 8, i64 6, i64 10, i64 4, i64 12, i64 2, i64 14, i64 0>, <8 x i64> [[X0]]) ; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i64> [[R]] @@ -192,8 +225,37 @@ define <8 x i64> @shuffle_vpermv3_v8i64_demandedbits(<8 x i64> %x0, <8 x i64> %x ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <8 x i64> [[M]], <i64 0, i64 16, i64 32, i64 64, i64 256, i64 512, i64 1024, i64 -16> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i64> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP6]], <8 x i64> [[T]], <8 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP33]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB35:.*]], label %[[BB36:.*]], !prof [[PROF1]] +; CHECK: [[BB35]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB36]]: ; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> [[T]], <8 x i64> [[X1]]) ; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i64> [[R]] @@ -213,8 +275,7 @@ define <4 x i32> @shuffle_vpermv3_v4i32(<4 x i32> %x0, <4 x i32> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP1]], <4 x i32> <i32 7, i32 2, i32 6, i32 0>, <4 x i32> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> <i32 7, i32 2, i32 6, i32 0>, <4 x i32> [[X1]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[R]] @@ -228,8 +289,7 @@ define <4 x i32> @shuffle_vpermv3_v4i32_unary(<4 x i32> %x0) #0 { ; CHECK-SAME: <4 x i32> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP1]], <4 x i32> <i32 7, i32 2, i32 6, i32 0>, <4 x i32> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> <i32 7, i32 2, i32 6, i32 0>, <4 x i32> [[X0]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[R]] @@ -252,8 +312,25 @@ define <4 x i32> @shuffle_vpermv3_v4i32_demandedbits(<4 x i32> %x0, <4 x i32> %x ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <4 x i32> [[M]], <i32 0, i32 8, i32 16, i32 32> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3 +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP6]], <4 x i32> [[T]], <4 x i32> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB23:.*]], label %[[BB24:.*]], !prof [[PROF1]] +; CHECK: [[BB23]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB24]]: ; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[T]], <4 x i32> [[X1]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[R]] @@ -269,8 +346,7 @@ define <8 x i32> @shuffle_vpermv3_v8i32(<8 x i32> %x0, <8 x i32> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP1]], <8 x i32> <i32 8, i32 6, i32 10, i32 4, i32 12, i32 2, i32 14, i32 0>, <8 x i32> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> <i32 8, i32 6, i32 10, i32 4, i32 12, i32 2, i32 14, i32 0>, <8 x i32> [[X1]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[R]] @@ -284,8 +360,7 @@ define <8 x i32> @shuffle_vpermv3_v8i32_unary(<8 x i32> %x0) #0 { ; CHECK-SAME: <8 x i32> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP1]], <8 x i32> <i32 8, i32 6, i32 10, i32 4, i32 12, i32 2, i32 14, i32 0>, <8 x i32> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> <i32 8, i32 6, i32 10, i32 4, i32 12, i32 2, i32 14, i32 0>, <8 x i32> [[X0]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[R]] @@ -308,8 +383,37 @@ define <8 x i32> @shuffle_vpermv3_v8i32_demandedbits(<8 x i32> %x0, <8 x i32> %x ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i32> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <8 x i32> [[M]], <i32 0, i32 16, i32 32, i32 64, i32 256, i32 512, i32 -16, i32 -32> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP6]], <8 x i32> [[T]], <8 x i32> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP33]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB35:.*]], label %[[BB36:.*]], !prof [[PROF1]] +; CHECK: [[BB35]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB36]]: ; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[T]], <8 x i32> [[X1]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[R]] @@ -325,8 +429,7 @@ define <16 x i32> @shuffle_vpermv3_v16i32(<16 x i32> %x0, <16 x i32> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> <i32 16, i32 14, i32 18, i32 12, i32 20, i32 10, i32 22, i32 8, i32 24, i32 6, i32 26, i32 4, i32 28, i32 2, i32 30, i32 0>, <16 x i32> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> <i32 16, i32 14, i32 18, i32 12, i32 20, i32 10, i32 22, i32 8, i32 24, i32 6, i32 26, i32 4, i32 28, i32 2, i32 30, i32 0>, <16 x i32> [[X1]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[R]] @@ -340,8 +443,7 @@ define <16 x i32> @shuffle_vpermv3_v16i32_unary(<16 x i32> %x0) #0 { ; CHECK-SAME: <16 x i32> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> <i32 16, i32 14, i32 18, i32 12, i32 20, i32 10, i32 22, i32 8, i32 24, i32 6, i32 26, i32 4, i32 28, i32 2, i32 30, i32 0>, <16 x i32> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> <i32 16, i32 14, i32 18, i32 12, i32 20, i32 10, i32 22, i32 8, i32 24, i32 6, i32 26, i32 4, i32 28, i32 2, i32 30, i32 0>, <16 x i32> [[X0]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[R]] @@ -364,8 +466,61 @@ define <16 x i32> @shuffle_vpermv3_v16i32_demandedbits(<16 x i32> %x0, <16 x i32 ; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <16 x i32> [[M]], <i32 0, i32 32, i32 64, i32 256, i32 512, i32 1024, i32 2048, i32 4096, i32 8192, i32 -32, i32 -64, i32 -128, i32 -256, i32 -512, i32 -1024, i32 -2048> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP9]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP9]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP9]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP9]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP9]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP9]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP9]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP6]], <16 x i32> [[T]], <16 x i32> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP57]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB59:.*]], label %[[BB60:.*]], !prof [[PROF1]] +; CHECK: [[BB59]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB60]]: ; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> [[T]], <16 x i32> [[X1]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[R]] @@ -385,8 +540,7 @@ define <8 x i16> @shuffle_vpermv3_v8i16(<8 x i16> %x0, <8 x i16> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[TMP1]], <8 x i16> <i16 8, i16 6, i16 10, i16 4, i16 12, i16 2, i16 14, i16 0>, <8 x i16> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> <i16 8, i16 6, i16 10, i16 4, i16 12, i16 2, i16 14, i16 0>, <8 x i16> [[X1]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[R]] @@ -400,8 +554,7 @@ define <8 x i16> @shuffle_vpermv3_v8i16_unary(<8 x i16> %x0) #0 { ; CHECK-SAME: <8 x i16> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[TMP1]], <8 x i16> <i16 8, i16 6, i16 10, i16 4, i16 12, i16 2, i16 14, i16 0>, <8 x i16> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> <i16 8, i16 6, i16 10, i16 4, i16 12, i16 2, i16 14, i16 0>, <8 x i16> [[X0]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[R]] @@ -424,8 +577,37 @@ define <8 x i16> @shuffle_vpermv3_v8i16_demandedbits(<8 x i16> %x0, <8 x i16> %x ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <8 x i16> [[M]], <i16 0, i16 16, i16 32, i16 64, i16 256, i16 512, i16 -16, i16 -32> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i16 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i16 [[TMP12]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i16> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i16 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i16> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i16 [[TMP18]], 7 +; CHECK-NEXT: [[TMP32:%.*]] = or i16 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i16> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i16> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i16> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i16 [[TMP24]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[TMP6]], <8 x i16> [[T]], <8 x i16> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP33]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB35:.*]], label %[[BB36:.*]], !prof [[PROF1]] +; CHECK: [[BB35]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB36]]: ; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> [[T]], <8 x i16> [[X1]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[R]] @@ -441,8 +623,7 @@ define <16 x i16> @shuffle_vpermv3_v16i16(<16 x i16> %x0, <16 x i16> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[TMP1]], <16 x i16> <i16 16, i16 14, i16 18, i16 12, i16 20, i16 10, i16 22, i16 8, i16 24, i16 6, i16 26, i16 4, i16 28, i16 2, i16 30, i16 0>, <16 x i16> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> <i16 16, i16 14, i16 18, i16 12, i16 20, i16 10, i16 22, i16 8, i16 24, i16 6, i16 26, i16 4, i16 28, i16 2, i16 30, i16 0>, <16 x i16> [[X1]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[R]] @@ -456,8 +637,7 @@ define <16 x i16> @shuffle_vpermv3_v16i16_unary(<16 x i16> %x0) #0 { ; CHECK-SAME: <16 x i16> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[TMP1]], <16 x i16> <i16 16, i16 14, i16 18, i16 12, i16 20, i16 10, i16 22, i16 8, i16 24, i16 6, i16 26, i16 4, i16 28, i16 2, i16 30, i16 0>, <16 x i16> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> <i16 16, i16 14, i16 18, i16 12, i16 20, i16 10, i16 22, i16 8, i16 24, i16 6, i16 26, i16 4, i16 28, i16 2, i16 30, i16 0>, <16 x i16> [[X0]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[R]] @@ -480,8 +660,61 @@ define <16 x i16> @shuffle_vpermv3_v16i16_demandedbits(<16 x i16> %x0, <16 x i16 ; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i16> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <16 x i16> [[M]], <i16 0, i16 32, i16 64, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 -32, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i16 [[TMP12]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i16 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i16 [[TMP18]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i16> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 15 +; CHECK-NEXT: [[TMP50:%.*]] = or i16 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i16> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i16> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i16 [[TMP24]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i16 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i16> [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i16> [[TMP9]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i16 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i16> [[TMP9]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i16 [[TMP30]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i16> [[TMP9]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i16> [[TMP9]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i16> [[TMP9]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i16 [[TMP36]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i16> [[TMP9]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i16> [[TMP9]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[TMP6]], <16 x i16> [[T]], <16 x i16> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP57]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB59:.*]], label %[[BB60:.*]], !prof [[PROF1]] +; CHECK: [[BB59]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB60]]: ; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> [[T]], <16 x i16> [[X1]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[R]] @@ -497,8 +730,7 @@ define <32 x i16> @shuffle_vpermv3_v32i16(<32 x i16> %x0, <32 x i16> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i16> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> <i16 33, i16 17, i16 35, i16 19, i16 37, i16 21, i16 39, i16 23, i16 41, i16 25, i16 43, i16 27, i16 45, i16 29, i16 47, i16 31, i16 49, i16 14, i16 51, i16 12, i16 53, i16 10, i16 55, i16 8, i16 57, i16 6, i16 59, i16 4, i16 61, i16 2, i16 63, i16 0>, <32 x i16> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> <i16 33, i16 17, i16 35, i16 19, i16 37, i16 21, i16 39, i16 23, i16 41, i16 25, i16 43, i16 27, i16 45, i16 29, i16 47, i16 31, i16 49, i16 14, i16 51, i16 12, i16 53, i16 10, i16 55, i16 8, i16 57, i16 6, i16 59, i16 4, i16 61, i16 2, i16 63, i16 0>, <32 x i16> [[X1]]) ; CHECK-NEXT: store <32 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[R]] @@ -512,8 +744,7 @@ define <32 x i16> @shuffle_vpermv3_v32i16_unary(<32 x i16> %x0) #0 { ; CHECK-SAME: <32 x i16> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i16> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> <i16 33, i16 17, i16 35, i16 19, i16 37, i16 21, i16 39, i16 23, i16 41, i16 25, i16 43, i16 27, i16 45, i16 29, i16 47, i16 31, i16 49, i16 14, i16 51, i16 12, i16 53, i16 10, i16 55, i16 8, i16 57, i16 6, i16 59, i16 4, i16 61, i16 2, i16 63, i16 0>, <32 x i16> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> <i16 33, i16 17, i16 35, i16 19, i16 37, i16 21, i16 39, i16 23, i16 41, i16 25, i16 43, i16 27, i16 45, i16 29, i16 47, i16 31, i16 49, i16 14, i16 51, i16 12, i16 53, i16 10, i16 55, i16 8, i16 57, i16 6, i16 59, i16 4, i16 61, i16 2, i16 63, i16 0>, <32 x i16> [[X0]]) ; CHECK-NEXT: store <32 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[R]] @@ -536,8 +767,109 @@ define <32 x i16> @shuffle_vpermv3_v32i16_demandedbits(<32 x i16> %x0, <32 x i16 ; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <32 x i16> [[M]], <i16 0, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 0, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096, i16 0, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 0, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i16> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i16> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31 +; CHECK-NEXT: [[TMP74:%.*]] = or i16 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i16> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i16 [[TMP12]], 31 +; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31 +; CHECK-NEXT: [[TMP77:%.*]] = or i16 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <32 x i16> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i16 [[TMP18]], 31 +; CHECK-NEXT: [[TMP80:%.*]] = or i16 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31 +; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31 +; CHECK-NEXT: [[TMP83:%.*]] = or i16 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <32 x i16> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i16 [[TMP24]], 31 +; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31 +; CHECK-NEXT: [[TMP86:%.*]] = or i16 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP9]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31 +; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <32 x i16> [[TMP9]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i16 [[TMP30]], 31 +; CHECK-NEXT: [[TMP89:%.*]] = or i16 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP9]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31 +; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP9]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31 +; CHECK-NEXT: [[TMP92:%.*]] = or i16 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <32 x i16> [[TMP9]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i16 [[TMP36]], 31 +; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP9]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31 +; CHECK-NEXT: [[TMP95:%.*]] = or i16 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP9]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31 +; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <32 x i16> [[TMP9]], i64 16 +; CHECK-NEXT: [[TMP43:%.*]] = and i16 [[TMP42]], 31 +; CHECK-NEXT: [[TMP98:%.*]] = or i16 [[TMP42]], [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP9]], i64 17 +; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP9]], i64 18 +; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31 +; CHECK-NEXT: [[TMP101:%.*]] = or i16 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <32 x i16> [[TMP9]], i64 19 +; CHECK-NEXT: [[TMP49:%.*]] = and i16 [[TMP48]], 31 +; CHECK-NEXT: [[TMP103:%.*]] = or i16 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP9]], i64 20 +; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31 +; CHECK-NEXT: [[TMP105:%.*]] = or i16 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP9]], i64 21 +; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31 +; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <32 x i16> [[TMP9]], i64 22 +; CHECK-NEXT: [[TMP55:%.*]] = and i16 [[TMP54]], 31 +; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP9]], i64 23 +; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31 +; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP9]], i64 24 +; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31 +; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP58]], [[TMP59]] +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <32 x i16> [[TMP9]], i64 25 +; CHECK-NEXT: [[TMP61:%.*]] = and i16 [[TMP60]], 31 +; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP60]], [[TMP61]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP9]], i64 26 +; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31 +; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP9]], i64 27 +; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31 +; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP64]], [[TMP65]] +; CHECK-NEXT: [[TMP66:%.*]] = extractelement <32 x i16> [[TMP9]], i64 28 +; CHECK-NEXT: [[TMP67:%.*]] = and i16 [[TMP66]], 31 +; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP66]], [[TMP67]] +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP9]], i64 29 +; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31 +; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP68]], [[TMP69]] +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP9]], i64 30 +; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31 +; CHECK-NEXT: [[TMP102:%.*]] = or i16 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <32 x i16> [[TMP9]], i64 31 +; CHECK-NEXT: [[TMP104:%.*]] = and i16 [[TMP72]], 31 +; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP72]], [[TMP104]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP6]], <32 x i16> [[T]], <32 x i16> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP60:%.*]] = icmp ne i16 [[TMP73]], 0 +; CHECK-NEXT: br i1 [[_MSCMP60]], label %[[BB107:.*]], label %[[BB108:.*]], !prof [[PROF1]] +; CHECK: [[BB107]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB108]]: ; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> [[T]], <32 x i16> [[X1]]) ; CHECK-NEXT: store <32 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[R]] @@ -557,8 +889,7 @@ define <16 x i8> @shuffle_vpermv3_v16i8(<16 x i8> %x0, <16 x i8> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[TMP1]], <16 x i8> <i8 16, i8 14, i8 18, i8 12, i8 20, i8 10, i8 22, i8 8, i8 24, i8 6, i8 26, i8 4, i8 28, i8 2, i8 30, i8 0>, <16 x i8> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> <i8 16, i8 14, i8 18, i8 12, i8 20, i8 10, i8 22, i8 8, i8 24, i8 6, i8 26, i8 4, i8 28, i8 2, i8 30, i8 0>, <16 x i8> [[X1]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[R]] @@ -572,8 +903,7 @@ define <16 x i8> @shuffle_vpermv3_v16i8_unary(<16 x i8> %x0) #0 { ; CHECK-SAME: <16 x i8> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[TMP1]], <16 x i8> <i8 16, i8 14, i8 18, i8 12, i8 20, i8 10, i8 22, i8 8, i8 24, i8 6, i8 26, i8 4, i8 28, i8 2, i8 30, i8 0>, <16 x i8> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> <i8 16, i8 14, i8 18, i8 12, i8 20, i8 10, i8 22, i8 8, i8 24, i8 6, i8 26, i8 4, i8 28, i8 2, i8 30, i8 0>, <16 x i8> [[X0]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[R]] @@ -596,8 +926,61 @@ define <16 x i8> @shuffle_vpermv3_v16i8_demandedbits(<16 x i8> %x0, <16 x i8> %x ; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <16 x i8> [[M]], <i8 0, i8 32, i8 64, i8 -128, i8 0, i8 -32, i8 -64, i8 -128, i8 0, i8 32, i8 64, i8 -128, i8 0, i8 -32, i8 -64, i8 -128> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i8 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i8 [[TMP12]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i8 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i8 [[TMP14]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i8 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i8 [[TMP16]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i8 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i8 [[TMP18]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i8 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i8 [[TMP20]], 15 +; CHECK-NEXT: [[TMP50:%.*]] = or i8 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i8 [[TMP22]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i8 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i8 [[TMP24]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i8 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i8 [[TMP26]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i8 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP9]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[TMP28]], 15 +; CHECK-NEXT: [[TMP56:%.*]] = or i8 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP9]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i8 [[TMP30]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i8 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP9]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i8 [[TMP32]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i8 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP9]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i8 [[TMP34]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i8 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP9]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i8 [[TMP36]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i8 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP9]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i8 [[TMP38]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP9]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i8 [[TMP40]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i8 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[TMP6]], <16 x i8> [[T]], <16 x i8> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP57]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB59:.*]], label %[[BB60:.*]], !prof [[PROF1]] +; CHECK: [[BB59]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB60]]: ; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> [[T]], <16 x i8> [[X1]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[R]] @@ -613,8 +996,7 @@ define <32 x i8> @shuffle_vpermv3_v32i8(<32 x i8> %x0, <32 x i8> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i8> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[TMP1]], <32 x i8> <i8 33, i8 17, i8 35, i8 19, i8 37, i8 21, i8 39, i8 23, i8 41, i8 25, i8 43, i8 27, i8 45, i8 29, i8 47, i8 31, i8 49, i8 14, i8 51, i8 12, i8 53, i8 10, i8 55, i8 8, i8 57, i8 6, i8 59, i8 4, i8 61, i8 2, i8 63, i8 0>, <32 x i8> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> <i8 33, i8 17, i8 35, i8 19, i8 37, i8 21, i8 39, i8 23, i8 41, i8 25, i8 43, i8 27, i8 45, i8 29, i8 47, i8 31, i8 49, i8 14, i8 51, i8 12, i8 53, i8 10, i8 55, i8 8, i8 57, i8 6, i8 59, i8 4, i8 61, i8 2, i8 63, i8 0>, <32 x i8> [[X1]]) ; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[R]] @@ -628,8 +1010,7 @@ define <32 x i8> @shuffle_vpermv3_v32i8_unary(<32 x i8> %x0) #0 { ; CHECK-SAME: <32 x i8> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i8> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[TMP1]], <32 x i8> <i8 33, i8 17, i8 35, i8 19, i8 37, i8 21, i8 39, i8 23, i8 41, i8 25, i8 43, i8 27, i8 45, i8 29, i8 47, i8 31, i8 49, i8 14, i8 51, i8 12, i8 53, i8 10, i8 55, i8 8, i8 57, i8 6, i8 59, i8 4, i8 61, i8 2, i8 63, i8 0>, <32 x i8> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> <i8 33, i8 17, i8 35, i8 19, i8 37, i8 21, i8 39, i8 23, i8 41, i8 25, i8 43, i8 27, i8 45, i8 29, i8 47, i8 31, i8 49, i8 14, i8 51, i8 12, i8 53, i8 10, i8 55, i8 8, i8 57, i8 6, i8 59, i8 4, i8 61, i8 2, i8 63, i8 0>, <32 x i8> [[X0]]) ; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[R]] @@ -652,8 +1033,109 @@ define <32 x i8> @shuffle_vpermv3_v32i8_demandedbits(<32 x i8> %x0, <32 x i8> %x ; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <32 x i8> [[M]], <i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i8> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 31 +; CHECK-NEXT: [[TMP74:%.*]] = or i8 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i8> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i8 [[TMP12]], 31 +; CHECK-NEXT: [[TMP76:%.*]] = or i8 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i8> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i8 [[TMP14]], 31 +; CHECK-NEXT: [[TMP77:%.*]] = or i8 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i8> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i8 [[TMP16]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = or i8 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <32 x i8> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i8 [[TMP18]], 31 +; CHECK-NEXT: [[TMP80:%.*]] = or i8 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i8> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i8 [[TMP20]], 31 +; CHECK-NEXT: [[TMP82:%.*]] = or i8 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i8> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i8 [[TMP22]], 31 +; CHECK-NEXT: [[TMP83:%.*]] = or i8 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <32 x i8> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i8 [[TMP24]], 31 +; CHECK-NEXT: [[TMP85:%.*]] = or i8 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i8> [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i8 [[TMP26]], 31 +; CHECK-NEXT: [[TMP86:%.*]] = or i8 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i8> [[TMP9]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[TMP28]], 31 +; CHECK-NEXT: [[TMP88:%.*]] = or i8 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <32 x i8> [[TMP9]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i8 [[TMP30]], 31 +; CHECK-NEXT: [[TMP89:%.*]] = or i8 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i8> [[TMP9]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i8 [[TMP32]], 31 +; CHECK-NEXT: [[TMP91:%.*]] = or i8 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i8> [[TMP9]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i8 [[TMP34]], 31 +; CHECK-NEXT: [[TMP92:%.*]] = or i8 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <32 x i8> [[TMP9]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i8 [[TMP36]], 31 +; CHECK-NEXT: [[TMP94:%.*]] = or i8 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i8> [[TMP9]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i8 [[TMP38]], 31 +; CHECK-NEXT: [[TMP95:%.*]] = or i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i8> [[TMP9]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i8 [[TMP40]], 31 +; CHECK-NEXT: [[TMP97:%.*]] = or i8 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <32 x i8> [[TMP9]], i64 16 +; CHECK-NEXT: [[TMP43:%.*]] = and i8 [[TMP42]], 31 +; CHECK-NEXT: [[TMP98:%.*]] = or i8 [[TMP42]], [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i8> [[TMP9]], i64 17 +; CHECK-NEXT: [[TMP45:%.*]] = and i8 [[TMP44]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = or i8 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i8> [[TMP9]], i64 18 +; CHECK-NEXT: [[TMP47:%.*]] = and i8 [[TMP46]], 31 +; CHECK-NEXT: [[TMP101:%.*]] = or i8 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <32 x i8> [[TMP9]], i64 19 +; CHECK-NEXT: [[TMP49:%.*]] = and i8 [[TMP48]], 31 +; CHECK-NEXT: [[TMP103:%.*]] = or i8 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i8> [[TMP9]], i64 20 +; CHECK-NEXT: [[TMP51:%.*]] = and i8 [[TMP50]], 31 +; CHECK-NEXT: [[TMP105:%.*]] = or i8 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i8> [[TMP9]], i64 21 +; CHECK-NEXT: [[TMP53:%.*]] = and i8 [[TMP52]], 31 +; CHECK-NEXT: [[TMP75:%.*]] = or i8 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <32 x i8> [[TMP9]], i64 22 +; CHECK-NEXT: [[TMP55:%.*]] = and i8 [[TMP54]], 31 +; CHECK-NEXT: [[TMP78:%.*]] = or i8 [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i8> [[TMP9]], i64 23 +; CHECK-NEXT: [[TMP57:%.*]] = and i8 [[TMP56]], 31 +; CHECK-NEXT: [[TMP81:%.*]] = or i8 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i8> [[TMP9]], i64 24 +; CHECK-NEXT: [[TMP59:%.*]] = and i8 [[TMP58]], 31 +; CHECK-NEXT: [[TMP84:%.*]] = or i8 [[TMP58]], [[TMP59]] +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <32 x i8> [[TMP9]], i64 25 +; CHECK-NEXT: [[TMP61:%.*]] = and i8 [[TMP60]], 31 +; CHECK-NEXT: [[TMP87:%.*]] = or i8 [[TMP60]], [[TMP61]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i8> [[TMP9]], i64 26 +; CHECK-NEXT: [[TMP63:%.*]] = and i8 [[TMP62]], 31 +; CHECK-NEXT: [[TMP90:%.*]] = or i8 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i8> [[TMP9]], i64 27 +; CHECK-NEXT: [[TMP65:%.*]] = and i8 [[TMP64]], 31 +; CHECK-NEXT: [[TMP93:%.*]] = or i8 [[TMP64]], [[TMP65]] +; CHECK-NEXT: [[TMP66:%.*]] = extractelement <32 x i8> [[TMP9]], i64 28 +; CHECK-NEXT: [[TMP67:%.*]] = and i8 [[TMP66]], 31 +; CHECK-NEXT: [[TMP96:%.*]] = or i8 [[TMP66]], [[TMP67]] +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i8> [[TMP9]], i64 29 +; CHECK-NEXT: [[TMP69:%.*]] = and i8 [[TMP68]], 31 +; CHECK-NEXT: [[TMP99:%.*]] = or i8 [[TMP68]], [[TMP69]] +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i8> [[TMP9]], i64 30 +; CHECK-NEXT: [[TMP71:%.*]] = and i8 [[TMP70]], 31 +; CHECK-NEXT: [[TMP102:%.*]] = or i8 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <32 x i8> [[TMP9]], i64 31 +; CHECK-NEXT: [[TMP104:%.*]] = and i8 [[TMP72]], 31 +; CHECK-NEXT: [[TMP73:%.*]] = or i8 [[TMP72]], [[TMP104]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[TMP6]], <32 x i8> [[T]], <32 x i8> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP60:%.*]] = icmp ne i8 [[TMP73]], 0 +; CHECK-NEXT: br i1 [[_MSCMP60]], label %[[BB107:.*]], label %[[BB108:.*]], !prof [[PROF1]] +; CHECK: [[BB107]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB108]]: ; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> [[T]], <32 x i8> [[X1]]) ; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[R]] @@ -669,8 +1151,7 @@ define <64 x i8> @shuffle_vpermv3_v64i8(<64 x i8> %x0, <64 x i8> %x1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <64 x i8> [[_MSPROP]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[TMP1]], <64 x i8> <i8 -128, i8 127, i8 126, i8 125, i8 124, i8 123, i8 122, i8 121, i8 120, i8 119, i8 118, i8 115, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> <i8 -128, i8 127, i8 126, i8 125, i8 124, i8 123, i8 122, i8 121, i8 120, i8 119, i8 118, i8 115, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> [[X1]]) ; CHECK-NEXT: store <64 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[R]] @@ -684,8 +1165,7 @@ define <64 x i8> @shuffle_vpermv3_v64i8_unary(<64 x i8> %x0) #0 { ; CHECK-SAME: <64 x i8> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <64 x i8> [[_MSPROP]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[TMP1]], <64 x i8> <i8 -128, i8 127, i8 126, i8 125, i8 124, i8 123, i8 122, i8 121, i8 120, i8 119, i8 118, i8 115, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> <i8 -128, i8 127, i8 126, i8 125, i8 124, i8 123, i8 122, i8 121, i8 120, i8 119, i8 118, i8 115, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> [[X0]]) ; CHECK-NEXT: store <64 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[R]] @@ -708,8 +1188,205 @@ define <64 x i8> @shuffle_vpermv3_v64i8_demandedbits(<64 x i8> %x0, <64 x i8> %x ; CHECK-NEXT: [[TMP8:%.*]] = or <64 x i8> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <64 x i8> [[M]], <i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[_MSPROP1:%.*]] = or <64 x i8> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <64 x i8> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 63 +; CHECK-NEXT: [[TMP139:%.*]] = or i8 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <64 x i8> [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i8 [[TMP12]], 63 +; CHECK-NEXT: [[TMP140:%.*]] = or i8 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <64 x i8> [[TMP9]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i8 [[TMP14]], 63 +; CHECK-NEXT: [[TMP142:%.*]] = or i8 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <64 x i8> [[TMP9]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = and i8 [[TMP16]], 63 +; CHECK-NEXT: [[TMP143:%.*]] = or i8 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <64 x i8> [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = and i8 [[TMP18]], 63 +; CHECK-NEXT: [[TMP145:%.*]] = or i8 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <64 x i8> [[TMP9]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i8 [[TMP20]], 63 +; CHECK-NEXT: [[TMP146:%.*]] = or i8 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <64 x i8> [[TMP9]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i8 [[TMP22]], 63 +; CHECK-NEXT: [[TMP148:%.*]] = or i8 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <64 x i8> [[TMP9]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i8 [[TMP24]], 63 +; CHECK-NEXT: [[TMP149:%.*]] = or i8 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <64 x i8> [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = and i8 [[TMP26]], 63 +; CHECK-NEXT: [[TMP151:%.*]] = or i8 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <64 x i8> [[TMP9]], i64 9 +; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[TMP28]], 63 +; CHECK-NEXT: [[TMP152:%.*]] = or i8 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <64 x i8> [[TMP9]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = and i8 [[TMP30]], 63 +; CHECK-NEXT: [[TMP154:%.*]] = or i8 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <64 x i8> [[TMP9]], i64 11 +; CHECK-NEXT: [[TMP33:%.*]] = and i8 [[TMP32]], 63 +; CHECK-NEXT: [[TMP155:%.*]] = or i8 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <64 x i8> [[TMP9]], i64 12 +; CHECK-NEXT: [[TMP35:%.*]] = and i8 [[TMP34]], 63 +; CHECK-NEXT: [[TMP157:%.*]] = or i8 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <64 x i8> [[TMP9]], i64 13 +; CHECK-NEXT: [[TMP37:%.*]] = and i8 [[TMP36]], 63 +; CHECK-NEXT: [[TMP158:%.*]] = or i8 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <64 x i8> [[TMP9]], i64 14 +; CHECK-NEXT: [[TMP39:%.*]] = and i8 [[TMP38]], 63 +; CHECK-NEXT: [[TMP160:%.*]] = or i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <64 x i8> [[TMP9]], i64 15 +; CHECK-NEXT: [[TMP41:%.*]] = and i8 [[TMP40]], 63 +; CHECK-NEXT: [[TMP161:%.*]] = or i8 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <64 x i8> [[TMP9]], i64 16 +; CHECK-NEXT: [[TMP43:%.*]] = and i8 [[TMP42]], 63 +; CHECK-NEXT: [[TMP163:%.*]] = or i8 [[TMP42]], [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <64 x i8> [[TMP9]], i64 17 +; CHECK-NEXT: [[TMP45:%.*]] = and i8 [[TMP44]], 63 +; CHECK-NEXT: [[TMP164:%.*]] = or i8 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <64 x i8> [[TMP9]], i64 18 +; CHECK-NEXT: [[TMP47:%.*]] = and i8 [[TMP46]], 63 +; CHECK-NEXT: [[TMP166:%.*]] = or i8 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <64 x i8> [[TMP9]], i64 19 +; CHECK-NEXT: [[TMP49:%.*]] = and i8 [[TMP48]], 63 +; CHECK-NEXT: [[TMP167:%.*]] = or i8 [[TMP48]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <64 x i8> [[TMP9]], i64 20 +; CHECK-NEXT: [[TMP51:%.*]] = and i8 [[TMP50]], 63 +; CHECK-NEXT: [[TMP169:%.*]] = or i8 [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <64 x i8> [[TMP9]], i64 21 +; CHECK-NEXT: [[TMP53:%.*]] = and i8 [[TMP52]], 63 +; CHECK-NEXT: [[TMP170:%.*]] = or i8 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <64 x i8> [[TMP9]], i64 22 +; CHECK-NEXT: [[TMP55:%.*]] = and i8 [[TMP54]], 63 +; CHECK-NEXT: [[TMP172:%.*]] = or i8 [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <64 x i8> [[TMP9]], i64 23 +; CHECK-NEXT: [[TMP57:%.*]] = and i8 [[TMP56]], 63 +; CHECK-NEXT: [[TMP173:%.*]] = or i8 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <64 x i8> [[TMP9]], i64 24 +; CHECK-NEXT: [[TMP59:%.*]] = and i8 [[TMP58]], 63 +; CHECK-NEXT: [[TMP175:%.*]] = or i8 [[TMP58]], [[TMP59]] +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <64 x i8> [[TMP9]], i64 25 +; CHECK-NEXT: [[TMP61:%.*]] = and i8 [[TMP60]], 63 +; CHECK-NEXT: [[TMP176:%.*]] = or i8 [[TMP60]], [[TMP61]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <64 x i8> [[TMP9]], i64 26 +; CHECK-NEXT: [[TMP63:%.*]] = and i8 [[TMP62]], 63 +; CHECK-NEXT: [[TMP178:%.*]] = or i8 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <64 x i8> [[TMP9]], i64 27 +; CHECK-NEXT: [[TMP65:%.*]] = and i8 [[TMP64]], 63 +; CHECK-NEXT: [[TMP179:%.*]] = or i8 [[TMP64]], [[TMP65]] +; CHECK-NEXT: [[TMP66:%.*]] = extractelement <64 x i8> [[TMP9]], i64 28 +; CHECK-NEXT: [[TMP67:%.*]] = and i8 [[TMP66]], 63 +; CHECK-NEXT: [[TMP181:%.*]] = or i8 [[TMP66]], [[TMP67]] +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <64 x i8> [[TMP9]], i64 29 +; CHECK-NEXT: [[TMP69:%.*]] = and i8 [[TMP68]], 63 +; CHECK-NEXT: [[TMP182:%.*]] = or i8 [[TMP68]], [[TMP69]] +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <64 x i8> [[TMP9]], i64 30 +; CHECK-NEXT: [[TMP71:%.*]] = and i8 [[TMP70]], 63 +; CHECK-NEXT: [[TMP184:%.*]] = or i8 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <64 x i8> [[TMP9]], i64 31 +; CHECK-NEXT: [[TMP73:%.*]] = and i8 [[TMP72]], 63 +; CHECK-NEXT: [[TMP185:%.*]] = or i8 [[TMP72]], [[TMP73]] +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <64 x i8> [[TMP9]], i64 32 +; CHECK-NEXT: [[TMP75:%.*]] = and i8 [[TMP74]], 63 +; CHECK-NEXT: [[TMP187:%.*]] = or i8 [[TMP74]], [[TMP75]] +; CHECK-NEXT: [[TMP76:%.*]] = extractelement <64 x i8> [[TMP9]], i64 33 +; CHECK-NEXT: [[TMP77:%.*]] = and i8 [[TMP76]], 63 +; CHECK-NEXT: [[TMP188:%.*]] = or i8 [[TMP76]], [[TMP77]] +; CHECK-NEXT: [[TMP78:%.*]] = extractelement <64 x i8> [[TMP9]], i64 34 +; CHECK-NEXT: [[TMP79:%.*]] = and i8 [[TMP78]], 63 +; CHECK-NEXT: [[TMP190:%.*]] = or i8 [[TMP78]], [[TMP79]] +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <64 x i8> [[TMP9]], i64 35 +; CHECK-NEXT: [[TMP81:%.*]] = and i8 [[TMP80]], 63 +; CHECK-NEXT: [[TMP191:%.*]] = or i8 [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <64 x i8> [[TMP9]], i64 36 +; CHECK-NEXT: [[TMP83:%.*]] = and i8 [[TMP82]], 63 +; CHECK-NEXT: [[TMP193:%.*]] = or i8 [[TMP82]], [[TMP83]] +; CHECK-NEXT: [[TMP84:%.*]] = extractelement <64 x i8> [[TMP9]], i64 37 +; CHECK-NEXT: [[TMP85:%.*]] = and i8 [[TMP84]], 63 +; CHECK-NEXT: [[TMP194:%.*]] = or i8 [[TMP84]], [[TMP85]] +; CHECK-NEXT: [[TMP86:%.*]] = extractelement <64 x i8> [[TMP9]], i64 38 +; CHECK-NEXT: [[TMP87:%.*]] = and i8 [[TMP86]], 63 +; CHECK-NEXT: [[TMP196:%.*]] = or i8 [[TMP86]], [[TMP87]] +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <64 x i8> [[TMP9]], i64 39 +; CHECK-NEXT: [[TMP89:%.*]] = and i8 [[TMP88]], 63 +; CHECK-NEXT: [[TMP197:%.*]] = or i8 [[TMP88]], [[TMP89]] +; CHECK-NEXT: [[TMP90:%.*]] = extractelement <64 x i8> [[TMP9]], i64 40 +; CHECK-NEXT: [[TMP91:%.*]] = and i8 [[TMP90]], 63 +; CHECK-NEXT: [[TMP199:%.*]] = or i8 [[TMP90]], [[TMP91]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <64 x i8> [[TMP9]], i64 41 +; CHECK-NEXT: [[TMP93:%.*]] = and i8 [[TMP92]], 63 +; CHECK-NEXT: [[TMP201:%.*]] = or i8 [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP94:%.*]] = extractelement <64 x i8> [[TMP9]], i64 42 +; CHECK-NEXT: [[TMP95:%.*]] = and i8 [[TMP94]], 63 +; CHECK-NEXT: [[TMP138:%.*]] = or i8 [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP96:%.*]] = extractelement <64 x i8> [[TMP9]], i64 43 +; CHECK-NEXT: [[TMP97:%.*]] = and i8 [[TMP96]], 63 +; CHECK-NEXT: [[TMP141:%.*]] = or i8 [[TMP96]], [[TMP97]] +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <64 x i8> [[TMP9]], i64 44 +; CHECK-NEXT: [[TMP99:%.*]] = and i8 [[TMP98]], 63 +; CHECK-NEXT: [[TMP144:%.*]] = or i8 [[TMP98]], [[TMP99]] +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <64 x i8> [[TMP9]], i64 45 +; CHECK-NEXT: [[TMP101:%.*]] = and i8 [[TMP100]], 63 +; CHECK-NEXT: [[TMP147:%.*]] = or i8 [[TMP100]], [[TMP101]] +; CHECK-NEXT: [[TMP102:%.*]] = extractelement <64 x i8> [[TMP9]], i64 46 +; CHECK-NEXT: [[TMP103:%.*]] = and i8 [[TMP102]], 63 +; CHECK-NEXT: [[TMP150:%.*]] = or i8 [[TMP102]], [[TMP103]] +; CHECK-NEXT: [[TMP104:%.*]] = extractelement <64 x i8> [[TMP9]], i64 47 +; CHECK-NEXT: [[TMP105:%.*]] = and i8 [[TMP104]], 63 +; CHECK-NEXT: [[TMP153:%.*]] = or i8 [[TMP104]], [[TMP105]] +; CHECK-NEXT: [[TMP106:%.*]] = extractelement <64 x i8> [[TMP9]], i64 48 +; CHECK-NEXT: [[TMP107:%.*]] = and i8 [[TMP106]], 63 +; CHECK-NEXT: [[TMP156:%.*]] = or i8 [[TMP106]], [[TMP107]] +; CHECK-NEXT: [[TMP108:%.*]] = extractelement <64 x i8> [[TMP9]], i64 49 +; CHECK-NEXT: [[TMP109:%.*]] = and i8 [[TMP108]], 63 +; CHECK-NEXT: [[TMP159:%.*]] = or i8 [[TMP108]], [[TMP109]] +; CHECK-NEXT: [[TMP110:%.*]] = extractelement <64 x i8> [[TMP9]], i64 50 +; CHECK-NEXT: [[TMP111:%.*]] = and i8 [[TMP110]], 63 +; CHECK-NEXT: [[TMP162:%.*]] = or i8 [[TMP110]], [[TMP111]] +; CHECK-NEXT: [[TMP112:%.*]] = extractelement <64 x i8> [[TMP9]], i64 51 +; CHECK-NEXT: [[TMP113:%.*]] = and i8 [[TMP112]], 63 +; CHECK-NEXT: [[TMP165:%.*]] = or i8 [[TMP112]], [[TMP113]] +; CHECK-NEXT: [[TMP114:%.*]] = extractelement <64 x i8> [[TMP9]], i64 52 +; CHECK-NEXT: [[TMP115:%.*]] = and i8 [[TMP114]], 63 +; CHECK-NEXT: [[TMP168:%.*]] = or i8 [[TMP114]], [[TMP115]] +; CHECK-NEXT: [[TMP116:%.*]] = extractelement <64 x i8> [[TMP9]], i64 53 +; CHECK-NEXT: [[TMP117:%.*]] = and i8 [[TMP116]], 63 +; CHECK-NEXT: [[TMP171:%.*]] = or i8 [[TMP116]], [[TMP117]] +; CHECK-NEXT: [[TMP118:%.*]] = extractelement <64 x i8> [[TMP9]], i64 54 +; CHECK-NEXT: [[TMP119:%.*]] = and i8 [[TMP118]], 63 +; CHECK-NEXT: [[TMP174:%.*]] = or i8 [[TMP118]], [[TMP119]] +; CHECK-NEXT: [[TMP120:%.*]] = extractelement <64 x i8> [[TMP9]], i64 55 +; CHECK-NEXT: [[TMP121:%.*]] = and i8 [[TMP120]], 63 +; CHECK-NEXT: [[TMP177:%.*]] = or i8 [[TMP120]], [[TMP121]] +; CHECK-NEXT: [[TMP122:%.*]] = extractelement <64 x i8> [[TMP9]], i64 56 +; CHECK-NEXT: [[TMP123:%.*]] = and i8 [[TMP122]], 63 +; CHECK-NEXT: [[TMP180:%.*]] = or i8 [[TMP122]], [[TMP123]] +; CHECK-NEXT: [[TMP124:%.*]] = extractelement <64 x i8> [[TMP9]], i64 57 +; CHECK-NEXT: [[TMP125:%.*]] = and i8 [[TMP124]], 63 +; CHECK-NEXT: [[TMP183:%.*]] = or i8 [[TMP124]], [[TMP125]] +; CHECK-NEXT: [[TMP126:%.*]] = extractelement <64 x i8> [[TMP9]], i64 58 +; CHECK-NEXT: [[TMP127:%.*]] = and i8 [[TMP126]], 63 +; CHECK-NEXT: [[TMP186:%.*]] = or i8 [[TMP126]], [[TMP127]] +; CHECK-NEXT: [[TMP128:%.*]] = extractelement <64 x i8> [[TMP9]], i64 59 +; CHECK-NEXT: [[TMP129:%.*]] = and i8 [[TMP128]], 63 +; CHECK-NEXT: [[TMP189:%.*]] = or i8 [[TMP128]], [[TMP129]] +; CHECK-NEXT: [[TMP130:%.*]] = extractelement <64 x i8> [[TMP9]], i64 60 +; CHECK-NEXT: [[TMP131:%.*]] = and i8 [[TMP130]], 63 +; CHECK-NEXT: [[TMP192:%.*]] = or i8 [[TMP130]], [[TMP131]] +; CHECK-NEXT: [[TMP132:%.*]] = extractelement <64 x i8> [[TMP9]], i64 61 +; CHECK-NEXT: [[TMP133:%.*]] = and i8 [[TMP132]], 63 +; CHECK-NEXT: [[TMP195:%.*]] = or i8 [[TMP132]], [[TMP133]] +; CHECK-NEXT: [[TMP134:%.*]] = extractelement <64 x i8> [[TMP9]], i64 62 +; CHECK-NEXT: [[TMP135:%.*]] = and i8 [[TMP134]], 63 +; CHECK-NEXT: [[TMP198:%.*]] = or i8 [[TMP134]], [[TMP135]] +; CHECK-NEXT: [[TMP136:%.*]] = extractelement <64 x i8> [[TMP9]], i64 63 +; CHECK-NEXT: [[TMP200:%.*]] = and i8 [[TMP136]], 63 +; CHECK-NEXT: [[TMP137:%.*]] = or i8 [[TMP136]], [[TMP200]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[TMP6]], <64 x i8> [[T]], <64 x i8> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP124:%.*]] = icmp ne i8 [[TMP137]], 0 +; CHECK-NEXT: br i1 [[_MSCMP124]], label %[[BB203:.*]], label %[[BB204:.*]], !prof [[PROF1]] +; CHECK: [[BB203]]: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: [[BB204]]: ; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> [[T]], <64 x i8> [[X1]]) ; CHECK-NEXT: store <64 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[R]] @@ -720,3 +1397,6 @@ define <64 x i8> @shuffle_vpermv3_v64i8_demandedbits(<64 x i8> %x0, <64 x i8> %x } attributes #0 = { sanitize_memory } +;. +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;. |