; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -mattr=+avx512f -passes=msan -S | FileCheck %s ; Forked from llvm/test/Transforms/InstCombine/X86/x86-vpermi2.ll target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; ; vXi64 ; define <2 x i64> @shuffle_vpermv3_v2i64(<2 x i64> %x0, <2 x i64> %x1) #0 { ; CHECK-LABEL: define <2 x i64> @shuffle_vpermv3_v2i64( ; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP1]], <2 x i64> , <2 x i64> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> , <2 x i64> [[X1]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %r = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> , <2 x i64> %x1) ret <2 x i64> %r } define <2 x i64> @shuffle_vpermv3_v2i64_unary(<2 x i64> %x0) #0 { ; CHECK-LABEL: define <2 x i64> @shuffle_vpermv3_v2i64_unary( ; CHECK-SAME: <2 x i64> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP1]], <2 x i64> , <2 x i64> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> , <2 x i64> [[X0]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %r = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> , <2 x i64> %x0) ret <2 x i64> %r } define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %m) #0 { ; CHECK-LABEL: define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits( ; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i64> [[M]], splat (i64 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <2 x i64> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <2 x i64> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i1> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[T]], <2 x i64> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i1> [[TMP10]] to i2 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1:![0-9]+]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[T]], <2 x i64> [[X1]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %t = or <2 x i64> %m, %r = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %t, <2 x i64> %x1) ret <2 x i64> %r } define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %m) #0 { ; CHECK-LABEL: define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits_negative( ; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i64> [[M]], splat (i64 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <2 x i64> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <2 x i64> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i1> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[T]], <2 x i64> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i1> [[TMP10]] to i2 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[T]], <2 x i64> [[X1]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %t = or <2 x i64> %m, %r = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %t, <2 x i64> %x1) ret <2 x i64> %r } define <4 x i64> @shuffle_vpermv3_v4i64(<4 x i64> %x0, <4 x i64> %x1) #0 { ; CHECK-LABEL: define <4 x i64> @shuffle_vpermv3_v4i64( ; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP1]], <4 x i64> , <4 x i64> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> , <4 x i64> [[X1]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[R]] ; %r = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> , <4 x i64> %x1) ret <4 x i64> %r } define <4 x i64> @shuffle_vpermv3_v4i64_unary(<4 x i64> %x0) #0 { ; CHECK-LABEL: define <4 x i64> @shuffle_vpermv3_v4i64_unary( ; CHECK-SAME: <4 x i64> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP1]], <4 x i64> , <4 x i64> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> , <4 x i64> [[X0]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[R]] ; %r = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> , <4 x i64> %x0) ret <4 x i64> %r } define <4 x i64> @shuffle_vpermv3_v4i64_demandedbits(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %m) #0 { ; CHECK-LABEL: define <4 x i64> @shuffle_vpermv3_v4i64_demandedbits( ; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i64> [[M]], splat (i64 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i64> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i64> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <4 x i64> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i2> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP6]], <4 x i64> [[T]], <4 x i64> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i2> [[TMP10]] to i8 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[T]], <4 x i64> [[X1]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[R]] ; %t = or <4 x i64> %m, %r = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %t, <4 x i64> %x1) ret <4 x i64> %r } define <8 x i64> @shuffle_vpermv3_v8i64(<8 x i64> %x0, <8 x i64> %x1) #0 { ; CHECK-LABEL: define <8 x i64> @shuffle_vpermv3_v8i64( ; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> , <8 x i64> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> , <8 x i64> [[X1]]) ; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i64> [[R]] ; %r = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1) ret <8 x i64> %r } define <8 x i64> @shuffle_vpermv3_v8i64_unary(<8 x i64> %x0) #0 { ; CHECK-LABEL: define <8 x i64> @shuffle_vpermv3_v8i64_unary( ; CHECK-SAME: <8 x i64> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> , <8 x i64> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> , <8 x i64> [[X0]]) ; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i64> [[R]] ; %r = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x0) ret <8 x i64> %r } define <8 x i64> @shuffle_vpermv3_v8i64_demandedbits(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %m) #0 { ; CHECK-LABEL: define <8 x i64> @shuffle_vpermv3_v8i64_demandedbits( ; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <8 x i64> [[M]], splat (i64 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i64> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <8 x i64> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <8 x i64> [[TMP9]] to <8 x i3> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP6]], <8 x i64> [[T]], <8 x i64> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i3> [[TMP10]] to i24 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> [[T]], <8 x i64> [[X1]]) ; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i64> [[R]] ; %t = or <8 x i64> %m, %r = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %t, <8 x i64> %x1) ret <8 x i64> %r } ; ; vXi32 ; define <4 x i32> @shuffle_vpermv3_v4i32(<4 x i32> %x0, <4 x i32> %x1) #0 { ; CHECK-LABEL: define <4 x i32> @shuffle_vpermv3_v4i32( ; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP1]], <4 x i32> , <4 x i32> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> , <4 x i32> [[X1]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[R]] ; %r = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> , <4 x i32> %x1) ret <4 x i32> %r } define <4 x i32> @shuffle_vpermv3_v4i32_unary(<4 x i32> %x0) #0 { ; CHECK-LABEL: define <4 x i32> @shuffle_vpermv3_v4i32_unary( ; CHECK-SAME: <4 x i32> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP1]], <4 x i32> , <4 x i32> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> , <4 x i32> [[X0]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[R]] ; %r = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> , <4 x i32> %x0) ret <4 x i32> %r } define <4 x i32> @shuffle_vpermv3_v4i32_demandedbits(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %m) #0 { ; CHECK-LABEL: define <4 x i32> @shuffle_vpermv3_v4i32_demandedbits( ; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[M]], splat (i32 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <4 x i32> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i32> [[TMP9]] to <4 x i2> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP6]], <4 x i32> [[T]], <4 x i32> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i2> [[TMP10]] to i8 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[T]], <4 x i32> [[X1]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[R]] ; %t = or <4 x i32> %m, %r = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %t, <4 x i32> %x1) ret <4 x i32> %r } define <8 x i32> @shuffle_vpermv3_v8i32(<8 x i32> %x0, <8 x i32> %x1) #0 { ; CHECK-LABEL: define <8 x i32> @shuffle_vpermv3_v8i32( ; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP1]], <8 x i32> , <8 x i32> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> , <8 x i32> [[X1]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[R]] ; %r = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> , <8 x i32> %x1) ret <8 x i32> %r } define <8 x i32> @shuffle_vpermv3_v8i32_unary(<8 x i32> %x0) #0 { ; CHECK-LABEL: define <8 x i32> @shuffle_vpermv3_v8i32_unary( ; CHECK-SAME: <8 x i32> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP1]], <8 x i32> , <8 x i32> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> , <8 x i32> [[X0]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[R]] ; %r = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> , <8 x i32> %x0) ret <8 x i32> %r } define <8 x i32> @shuffle_vpermv3_v8i32_demandedbits(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %m) #0 { ; CHECK-LABEL: define <8 x i32> @shuffle_vpermv3_v8i32_demandedbits( ; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <8 x i32> [[M]], splat (i32 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i32> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <8 x i32> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <8 x i32> [[TMP9]] to <8 x i3> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP6]], <8 x i32> [[T]], <8 x i32> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i3> [[TMP10]] to i24 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[T]], <8 x i32> [[X1]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[R]] ; %t = or <8 x i32> %m, %r = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %t, <8 x i32> %x1) ret <8 x i32> %r } define <16 x i32> @shuffle_vpermv3_v16i32(<16 x i32> %x0, <16 x i32> %x1) #0 { ; CHECK-LABEL: define <16 x i32> @shuffle_vpermv3_v16i32( ; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> , <16 x i32> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> , <16 x i32> [[X1]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[R]] ; %r = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1) ret <16 x i32> %r } define <16 x i32> @shuffle_vpermv3_v16i32_unary(<16 x i32> %x0) #0 { ; CHECK-LABEL: define <16 x i32> @shuffle_vpermv3_v16i32_unary( ; CHECK-SAME: <16 x i32> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> , <16 x i32> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> , <16 x i32> [[X0]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[R]] ; %r = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x0) ret <16 x i32> %r } define <16 x i32> @shuffle_vpermv3_v16i32_demandedbits(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %m) #0 { ; CHECK-LABEL: define <16 x i32> @shuffle_vpermv3_v16i32_demandedbits( ; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[M]], splat (i32 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <16 x i32> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP9]] to <16 x i4> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP6]], <16 x i32> [[T]], <16 x i32> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i4> [[TMP10]] to i64 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> [[T]], <16 x i32> [[X1]]) ; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i32> [[R]] ; %t = or <16 x i32> %m, %r = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %t, <16 x i32> %x1) ret <16 x i32> %r } ; ; vXi16 ; define <8 x i16> @shuffle_vpermv3_v8i16(<8 x i16> %x0, <8 x i16> %x1) #0 { ; CHECK-LABEL: define <8 x i16> @shuffle_vpermv3_v8i16( ; CHECK-SAME: <8 x i16> [[X0:%.*]], <8 x i16> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[TMP1]], <8 x i16> , <8 x i16> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> , <8 x i16> [[X1]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[R]] ; %r = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> , <8 x i16> %x1) ret <8 x i16> %r } define <8 x i16> @shuffle_vpermv3_v8i16_unary(<8 x i16> %x0) #0 { ; CHECK-LABEL: define <8 x i16> @shuffle_vpermv3_v8i16_unary( ; CHECK-SAME: <8 x i16> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[TMP1]], <8 x i16> , <8 x i16> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> , <8 x i16> [[X0]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[R]] ; %r = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> , <8 x i16> %x0) ret <8 x i16> %r } define <8 x i16> @shuffle_vpermv3_v8i16_demandedbits(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %m) #0 { ; CHECK-LABEL: define <8 x i16> @shuffle_vpermv3_v8i16_demandedbits( ; CHECK-SAME: <8 x i16> [[X0:%.*]], <8 x i16> [[X1:%.*]], <8 x i16> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <8 x i16> [[M]], splat (i16 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i16> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i16> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <8 x i16> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <8 x i16> [[TMP9]] to <8 x i3> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[TMP6]], <8 x i16> [[T]], <8 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i3> [[TMP10]] to i24 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> [[T]], <8 x i16> [[X1]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[R]] ; %t = or <8 x i16> %m, %r = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %t, <8 x i16> %x1) ret <8 x i16> %r } define <16 x i16> @shuffle_vpermv3_v16i16(<16 x i16> %x0, <16 x i16> %x1) #0 { ; CHECK-LABEL: define <16 x i16> @shuffle_vpermv3_v16i16( ; CHECK-SAME: <16 x i16> [[X0:%.*]], <16 x i16> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[TMP1]], <16 x i16> , <16 x i16> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> , <16 x i16> [[X1]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[R]] ; %r = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> , <16 x i16> %x1) ret <16 x i16> %r } define <16 x i16> @shuffle_vpermv3_v16i16_unary(<16 x i16> %x0) #0 { ; CHECK-LABEL: define <16 x i16> @shuffle_vpermv3_v16i16_unary( ; CHECK-SAME: <16 x i16> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[TMP1]], <16 x i16> , <16 x i16> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> , <16 x i16> [[X0]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[R]] ; %r = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> , <16 x i16> %x0) ret <16 x i16> %r } define <16 x i16> @shuffle_vpermv3_v16i16_demandedbits(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %m) #0 { ; CHECK-LABEL: define <16 x i16> @shuffle_vpermv3_v16i16_demandedbits( ; CHECK-SAME: <16 x i16> [[X0:%.*]], <16 x i16> [[X1:%.*]], <16 x i16> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <16 x i16> [[M]], splat (i16 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i16> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i16> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i16> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <16 x i16> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i16> [[TMP9]] to <16 x i4> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[TMP6]], <16 x i16> [[T]], <16 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i4> [[TMP10]] to i64 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> [[T]], <16 x i16> [[X1]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[R]] ; %t = or <16 x i16> %m, %r = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %t, <16 x i16> %x1) ret <16 x i16> %r } define <32 x i16> @shuffle_vpermv3_v32i16(<32 x i16> %x0, <32 x i16> %x1) #0 { ; CHECK-LABEL: define <32 x i16> @shuffle_vpermv3_v32i16( ; CHECK-SAME: <32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> , <32 x i16> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> , <32 x i16> [[X1]]) ; CHECK-NEXT: store <32 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[R]] ; %r = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> , <32 x i16> %x1) ret <32 x i16> %r } define <32 x i16> @shuffle_vpermv3_v32i16_unary(<32 x i16> %x0) #0 { ; CHECK-LABEL: define <32 x i16> @shuffle_vpermv3_v32i16_unary( ; CHECK-SAME: <32 x i16> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> , <32 x i16> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> , <32 x i16> [[X0]]) ; CHECK-NEXT: store <32 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[R]] ; %r = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> , <32 x i16> %x0) ret <32 x i16> %r } define <32 x i16> @shuffle_vpermv3_v32i16_demandedbits(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %m) #0 { ; CHECK-LABEL: define <32 x i16> @shuffle_vpermv3_v32i16_demandedbits( ; CHECK-SAME: <32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]], <32 x i16> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <32 x i16> [[M]], splat (i16 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <32 x i16> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i16> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <32 x i16> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <32 x i16> [[TMP9]] to <32 x i5> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP6]], <32 x i16> [[T]], <32 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i5> [[TMP10]] to i160 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> [[T]], <32 x i16> [[X1]]) ; CHECK-NEXT: store <32 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[R]] ; %t = or <32 x i16> %m, %r = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %t, <32 x i16> %x1) ret <32 x i16> %r } ; ; vXi8 ; define <16 x i8> @shuffle_vpermv3_v16i8(<16 x i8> %x0, <16 x i8> %x1) #0 { ; CHECK-LABEL: define <16 x i8> @shuffle_vpermv3_v16i8( ; CHECK-SAME: <16 x i8> [[X0:%.*]], <16 x i8> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[TMP1]], <16 x i8> , <16 x i8> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> , <16 x i8> [[X1]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[R]] ; %r = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> , <16 x i8> %x1) ret <16 x i8> %r } define <16 x i8> @shuffle_vpermv3_v16i8_unary(<16 x i8> %x0) #0 { ; CHECK-LABEL: define <16 x i8> @shuffle_vpermv3_v16i8_unary( ; CHECK-SAME: <16 x i8> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[TMP1]], <16 x i8> , <16 x i8> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> , <16 x i8> [[X0]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[R]] ; %r = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> , <16 x i8> %x0) ret <16 x i8> %r } define <16 x i8> @shuffle_vpermv3_v16i8_demandedbits(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %m) #0 { ; CHECK-LABEL: define <16 x i8> @shuffle_vpermv3_v16i8_demandedbits( ; CHECK-SAME: <16 x i8> [[X0:%.*]], <16 x i8> [[X1:%.*]], <16 x i8> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <16 x i8> [[M]], splat (i8 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i8> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i8> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <16 x i8> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i8> [[TMP9]] to <16 x i4> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[TMP6]], <16 x i8> [[T]], <16 x i8> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i4> [[TMP10]] to i64 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> [[T]], <16 x i8> [[X1]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[R]] ; %t = or <16 x i8> %m, %r = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %t, <16 x i8> %x1) ret <16 x i8> %r } define <32 x i8> @shuffle_vpermv3_v32i8(<32 x i8> %x0, <32 x i8> %x1) #0 { ; CHECK-LABEL: define <32 x i8> @shuffle_vpermv3_v32i8( ; CHECK-SAME: <32 x i8> [[X0:%.*]], <32 x i8> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[TMP1]], <32 x i8> , <32 x i8> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> , <32 x i8> [[X1]]) ; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[R]] ; %r = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> , <32 x i8> %x1) ret <32 x i8> %r } define <32 x i8> @shuffle_vpermv3_v32i8_unary(<32 x i8> %x0) #0 { ; CHECK-LABEL: define <32 x i8> @shuffle_vpermv3_v32i8_unary( ; CHECK-SAME: <32 x i8> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[TMP1]], <32 x i8> , <32 x i8> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> , <32 x i8> [[X0]]) ; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[R]] ; %r = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> , <32 x i8> %x0) ret <32 x i8> %r } define <32 x i8> @shuffle_vpermv3_v32i8_demandedbits(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %m) #0 { ; CHECK-LABEL: define <32 x i8> @shuffle_vpermv3_v32i8_demandedbits( ; CHECK-SAME: <32 x i8> [[X0:%.*]], <32 x i8> [[X1:%.*]], <32 x i8> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <32 x i8> [[M]], splat (i8 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <32 x i8> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i8> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <32 x i8> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <32 x i8> [[TMP9]] to <32 x i5> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[TMP6]], <32 x i8> [[T]], <32 x i8> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i5> [[TMP10]] to i160 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> [[T]], <32 x i8> [[X1]]) ; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[R]] ; %t = or <32 x i8> %m, %r = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %t, <32 x i8> %x1) ret <32 x i8> %r } define <64 x i8> @shuffle_vpermv3_v64i8(<64 x i8> %x0, <64 x i8> %x1) #0 { ; CHECK-LABEL: define <64 x i8> @shuffle_vpermv3_v64i8( ; CHECK-SAME: <64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[TMP1]], <64 x i8> , <64 x i8> [[TMP2]]) ; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> , <64 x i8> [[X1]]) ; CHECK-NEXT: store <64 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[R]] ; %r = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> , <64 x i8> %x1) ret <64 x i8> %r } define <64 x i8> @shuffle_vpermv3_v64i8_unary(<64 x i8> %x0) #0 { ; CHECK-LABEL: define <64 x i8> @shuffle_vpermv3_v64i8_unary( ; CHECK-SAME: <64 x i8> [[X0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[TMP1]], <64 x i8> , <64 x i8> [[TMP1]]) ; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> , <64 x i8> [[X0]]) ; CHECK-NEXT: store <64 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[R]] ; %r = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> , <64 x i8> %x0) ret <64 x i8> %r } define <64 x i8> @shuffle_vpermv3_v64i8_demandedbits(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %m) #0 { ; CHECK-LABEL: define <64 x i8> @shuffle_vpermv3_v64i8_demandedbits( ; CHECK-SAME: <64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]], <64 x i8> [[M:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = xor <64 x i8> [[M]], splat (i8 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <64 x i8> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <64 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <64 x i8> [[TMP4]], ; CHECK-NEXT: [[TMP8:%.*]] = or <64 x i8> [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[T:%.*]] = or <64 x i8> [[M]], ; CHECK-NEXT: [[TMP10:%.*]] = trunc <64 x i8> [[TMP9]] to <64 x i6> ; CHECK-NEXT: [[_MSPROP1:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[TMP6]], <64 x i8> [[T]], <64 x i8> [[TMP3]]) ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <64 x i6> [[TMP10]] to i384 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i384 [[TMP12]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]] ; CHECK: [[BB13]]: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable ; CHECK: [[BB14]]: ; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> [[T]], <64 x i8> [[X1]]) ; CHECK-NEXT: store <64 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[R]] ; %t = or <64 x i8> %m, %r = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %t, <64 x i8> %x1) ret <64 x i8> %r } attributes #0 = { sanitize_memory } ;. ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} ;.