; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bf16,+avx512vl | FileCheck %s --check-prefixes=CHECK,BF16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2 | FileCheck %s --check-prefixes=CHECK,AVX10_2 ; ; Signed Integer to BFloat ; define bfloat @sitofp_i32_to_bf16(i32 %a) { ; CHECK-LABEL: sitofp_i32_to_bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0 ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; CHECK-NEXT: retq %cvt = sitofp i32 %a to bfloat ret bfloat %cvt } define bfloat @sitofp_i64_to_bf16(i64 %a) { ; CHECK-LABEL: sitofp_i64_to_bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0 ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; CHECK-NEXT: retq %cvt = sitofp i64 %a to bfloat ret bfloat %cvt } define <8 x bfloat> @sitofp_v8i32_to_v8bf16(<8 x i32> %a) { ; CHECK-LABEL: sitofp_v8i32_to_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 ; CHECK-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %cvt = sitofp <8 x i32> %a to <8 x bfloat> ret <8 x bfloat> %cvt } define <4 x bfloat> @sitofp_v4i32_to_v4bf16(<4 x i32> %a) { ; CHECK-LABEL: sitofp_v4i32_to_v4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 ; CHECK-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %cvt = sitofp <4 x i32> %a to <4 x bfloat> ret <4 x bfloat> %cvt } define <2 x bfloat> @sitofp_v2i32_to_v2bf16(<2 x i32> %a) { ; CHECK-LABEL: sitofp_v2i32_to_v2bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 ; CHECK-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %cvt = sitofp <2 x i32> %a to <2 x bfloat> ret <2 x bfloat> %cvt } define <16 x bfloat> @sitofp_v16i32_to_v16bf16(<16 x i32> %a) { ; CHECK-LABEL: sitofp_v16i32_to_v16bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NEXT: vcvtneps2bf16 %zmm0, %ymm0 ; CHECK-NEXT: retq %cvt = sitofp <16 x i32> %a to <16 x bfloat> ret <16 x bfloat> %cvt } define <2 x bfloat> @sitofp_v2i64_to_v2bf16(<2 x i64> %a) { ; BF16-LABEL: sitofp_v2i64_to_v2bf16: ; BF16: # %bb.0: ; BF16-NEXT: vpextrq $1, %xmm0, %rax ; BF16-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %eax ; BF16-NEXT: vmovq %xmm0, %rcx ; BF16-NEXT: vcvtsi2ss %rcx, %xmm15, %xmm0 ; BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; BF16-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BF16-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; BF16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ; BF16-NEXT: retq ; ; AVX10_2-LABEL: sitofp_v2i64_to_v2bf16: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX10_2-NEXT: vcvtqq2ps %zmm0, %ymm0 ; AVX10_2-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; AVX10_2-NEXT: vzeroupper ; AVX10_2-NEXT: retq %cvt = sitofp <2 x i64> %a to <2 x bfloat> ret <2 x bfloat> %cvt } define <4 x bfloat> @sitofp_v4i64_to_v4bf16(<4 x i64> %a) { ; BF16-LABEL: sitofp_v4i64_to_v4bf16: ; BF16: # %bb.0: ; BF16-NEXT: vextracti128 $1, %ymm0, %xmm1 ; BF16-NEXT: vpextrq $1, %xmm1, %rax ; BF16-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2 ; BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm2 ; BF16-NEXT: vmovd %xmm2, %eax ; BF16-NEXT: vmovq %xmm1, %rcx ; BF16-NEXT: vcvtsi2ss %rcx, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %ecx ; BF16-NEXT: vpextrq $1, %xmm0, %rdx ; BF16-NEXT: vcvtsi2ss %rdx, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %edx ; BF16-NEXT: vmovq %xmm0, %rsi ; BF16-NEXT: vcvtsi2ss %rsi, %xmm15, %xmm0 ; BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; BF16-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BF16-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; BF16-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ; BF16-NEXT: vzeroupper ; BF16-NEXT: retq ; ; AVX10_2-LABEL: sitofp_v4i64_to_v4bf16: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX10_2-NEXT: vcvtqq2ps %zmm0, %ymm0 ; AVX10_2-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; AVX10_2-NEXT: vzeroupper ; AVX10_2-NEXT: retq %cvt = sitofp <4 x i64> %a to <4 x bfloat> ret <4 x bfloat> %cvt } define <8 x bfloat> @sitofp_v8i64_to_v8bf16(<8 x i64> %a) { ; BF16-LABEL: sitofp_v8i64_to_v8bf16: ; BF16: # %bb.0: ; BF16-NEXT: vextracti32x4 $3, %zmm0, %xmm1 ; BF16-NEXT: vpextrq $1, %xmm1, %rax ; BF16-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2 ; BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm2 ; BF16-NEXT: vmovd %xmm2, %eax ; BF16-NEXT: vmovq %xmm1, %rcx ; BF16-NEXT: vcvtsi2ss %rcx, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %ecx ; BF16-NEXT: vextracti32x4 $2, %zmm0, %xmm1 ; BF16-NEXT: vpextrq $1, %xmm1, %rdx ; BF16-NEXT: vcvtsi2ss %rdx, %xmm15, %xmm2 ; BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm2 ; BF16-NEXT: vmovd %xmm2, %edx ; BF16-NEXT: vmovq %xmm1, %rsi ; BF16-NEXT: vcvtsi2ss %rsi, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %esi ; BF16-NEXT: vextracti128 $1, %ymm0, %xmm1 ; BF16-NEXT: vpextrq $1, %xmm1, %rdi ; BF16-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm2 ; BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm2 ; BF16-NEXT: vmovd %xmm2, %edi ; BF16-NEXT: vmovq %xmm1, %r8 ; BF16-NEXT: vcvtsi2ss %r8, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %r8d ; BF16-NEXT: vpextrq $1, %xmm0, %r9 ; BF16-NEXT: vcvtsi2ss %r9, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovq %xmm0, %r9 ; BF16-NEXT: vcvtsi2ss %r9, %xmm15, %xmm0 ; BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; BF16-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BF16-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $3, %edi, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $4, %esi, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; BF16-NEXT: vzeroupper ; BF16-NEXT: retq ; ; AVX10_2-LABEL: sitofp_v8i64_to_v8bf16: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vcvtqq2ps %zmm0, %ymm0 ; AVX10_2-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; AVX10_2-NEXT: vzeroupper ; AVX10_2-NEXT: retq %cvt = sitofp <8 x i64> %a to <8 x bfloat> ret <8 x bfloat> %cvt } ; ; Unsigned Integer to BFloat ; define bfloat @uitofp_i32_to_bf16(i32 %a) { ; CHECK-LABEL: uitofp_i32_to_bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0 ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; CHECK-NEXT: retq %cvt = uitofp i32 %a to bfloat ret bfloat %cvt } define bfloat @uitofp_i64_to_bf16(i64 %a) { ; CHECK-LABEL: uitofp_i64_to_bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtusi2ss %rdi, %xmm15, %xmm0 ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; CHECK-NEXT: retq %cvt = uitofp i64 %a to bfloat ret bfloat %cvt } define <8 x bfloat> @uitofp_v8i32_to_v8bf16(<8 x i32> %a) { ; CHECK-LABEL: uitofp_v8i32_to_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ; CHECK-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %cvt = uitofp <8 x i32> %a to <8 x bfloat> ret <8 x bfloat> %cvt } define <4 x bfloat> @uitofp_v4i32_to_v4bf16(<4 x i32> %a) { ; CHECK-LABEL: uitofp_v4i32_to_v4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ; CHECK-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %cvt = uitofp <4 x i32> %a to <4 x bfloat> ret <4 x bfloat> %cvt } define <2 x bfloat> @uitofp_v2i32_to_v2bf16(<2 x i32> %a) { ; CHECK-LABEL: uitofp_v2i32_to_v2bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ; CHECK-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %cvt = uitofp <2 x i32> %a to <2 x bfloat> ret <2 x bfloat> %cvt } define <16 x bfloat> @uitofp_v16i32_to_v16bf16(<16 x i32> %a) { ; CHECK-LABEL: uitofp_v16i32_to_v16bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 ; CHECK-NEXT: vcvtneps2bf16 %zmm0, %ymm0 ; CHECK-NEXT: retq %cvt = uitofp <16 x i32> %a to <16 x bfloat> ret <16 x bfloat> %cvt } define <2 x bfloat> @uitofp_v2i64_to_v2bf16(<2 x i64> %a) { ; BF16-LABEL: uitofp_v2i64_to_v2bf16: ; BF16: # %bb.0: ; BF16-NEXT: vpextrq $1, %xmm0, %rax ; BF16-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %eax ; BF16-NEXT: vmovq %xmm0, %rcx ; BF16-NEXT: vcvtusi2ss %rcx, %xmm15, %xmm0 ; BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; BF16-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BF16-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; BF16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ; BF16-NEXT: retq ; ; AVX10_2-LABEL: uitofp_v2i64_to_v2bf16: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX10_2-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; AVX10_2-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; AVX10_2-NEXT: vzeroupper ; AVX10_2-NEXT: retq %cvt = uitofp <2 x i64> %a to <2 x bfloat> ret <2 x bfloat> %cvt } define <4 x bfloat> @uitofp_v4i64_to_v4bf16(<4 x i64> %a) { ; BF16-LABEL: uitofp_v4i64_to_v4bf16: ; BF16: # %bb.0: ; BF16-NEXT: vextracti128 $1, %ymm0, %xmm1 ; BF16-NEXT: vpextrq $1, %xmm1, %rax ; BF16-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2 ; BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm2 ; BF16-NEXT: vmovd %xmm2, %eax ; BF16-NEXT: vmovq %xmm1, %rcx ; BF16-NEXT: vcvtusi2ss %rcx, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %ecx ; BF16-NEXT: vpextrq $1, %xmm0, %rdx ; BF16-NEXT: vcvtusi2ss %rdx, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %edx ; BF16-NEXT: vmovq %xmm0, %rsi ; BF16-NEXT: vcvtusi2ss %rsi, %xmm15, %xmm0 ; BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; BF16-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BF16-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; BF16-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ; BF16-NEXT: vzeroupper ; BF16-NEXT: retq ; ; AVX10_2-LABEL: uitofp_v4i64_to_v4bf16: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX10_2-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; AVX10_2-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; AVX10_2-NEXT: vzeroupper ; AVX10_2-NEXT: retq %cvt = uitofp <4 x i64> %a to <4 x bfloat> ret <4 x bfloat> %cvt } define <8 x bfloat> @uitofp_v8i64_to_v8bf16(<8 x i64> %a) { ; BF16-LABEL: uitofp_v8i64_to_v8bf16: ; BF16: # %bb.0: ; BF16-NEXT: vextracti32x4 $3, %zmm0, %xmm1 ; BF16-NEXT: vpextrq $1, %xmm1, %rax ; BF16-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2 ; BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm2 ; BF16-NEXT: vmovd %xmm2, %eax ; BF16-NEXT: vmovq %xmm1, %rcx ; BF16-NEXT: vcvtusi2ss %rcx, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %ecx ; BF16-NEXT: vextracti32x4 $2, %zmm0, %xmm1 ; BF16-NEXT: vpextrq $1, %xmm1, %rdx ; BF16-NEXT: vcvtusi2ss %rdx, %xmm15, %xmm2 ; BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm2 ; BF16-NEXT: vmovd %xmm2, %edx ; BF16-NEXT: vmovq %xmm1, %rsi ; BF16-NEXT: vcvtusi2ss %rsi, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %esi ; BF16-NEXT: vextracti128 $1, %ymm0, %xmm1 ; BF16-NEXT: vpextrq $1, %xmm1, %rdi ; BF16-NEXT: vcvtusi2ss %rdi, %xmm15, %xmm2 ; BF16-NEXT: vcvtneps2bf16 %xmm2, %xmm2 ; BF16-NEXT: vmovd %xmm2, %edi ; BF16-NEXT: vmovq %xmm1, %r8 ; BF16-NEXT: vcvtusi2ss %r8, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovd %xmm1, %r8d ; BF16-NEXT: vpextrq $1, %xmm0, %r9 ; BF16-NEXT: vcvtusi2ss %r9, %xmm15, %xmm1 ; BF16-NEXT: vcvtneps2bf16 %xmm1, %xmm1 ; BF16-NEXT: vmovq %xmm0, %r9 ; BF16-NEXT: vcvtusi2ss %r9, %xmm15, %xmm0 ; BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0 ; BF16-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BF16-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $3, %edi, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $4, %esi, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; BF16-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; BF16-NEXT: vzeroupper ; BF16-NEXT: retq ; ; AVX10_2-LABEL: uitofp_v8i64_to_v8bf16: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; AVX10_2-NEXT: vcvtneps2bf16 %ymm0, %xmm0 ; AVX10_2-NEXT: vzeroupper ; AVX10_2-NEXT: retq %cvt = uitofp <8 x i64> %a to <8 x bfloat> ret <8 x bfloat> %cvt } ; ; BFloat to Signed Integer ; define i32 @fptosi_bf16_to_i32(bfloat %a) { ; BF16-LABEL: fptosi_bf16_to_i32: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $0, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2si %xmm0, %eax ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptosi_bf16_to_i32: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vmovw %xmm0, %eax ; AVX10_2-NEXT: shll $16, %eax ; AVX10_2-NEXT: vmovd %eax, %xmm0 ; AVX10_2-NEXT: vcvttss2si %xmm0, %eax ; AVX10_2-NEXT: retq %cvt = fptosi bfloat %a to i32 ret i32 %cvt } define i64 @fptosi_bf16_to_i64(bfloat %a) { ; BF16-LABEL: fptosi_bf16_to_i64: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $0, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2si %xmm0, %rax ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptosi_bf16_to_i64: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vmovw %xmm0, %eax ; AVX10_2-NEXT: shll $16, %eax ; AVX10_2-NEXT: vmovd %eax, %xmm0 ; AVX10_2-NEXT: vcvttss2si %xmm0, %rax ; AVX10_2-NEXT: retq %cvt = fptosi bfloat %a to i64 ret i64 %cvt } define <8 x i32> @fptosi_v8bf16_to_v8i32(<8 x bfloat> %a) { ; CHECK-LABEL: fptosi_v8bf16_to_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NEXT: vpslld $16, %ymm0, %ymm0 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ; CHECK-NEXT: retq %cvt = fptosi <8 x bfloat> %a to <8 x i32> ret <8 x i32> %cvt } define <4 x i32> @fptosi_v4bf16_to_v4i32(<4 x bfloat> %a) { ; CHECK-LABEL: fptosi_v4bf16_to_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ; CHECK-NEXT: retq %cvt = fptosi <4 x bfloat> %a to <4 x i32> ret <4 x i32> %cvt } define <2 x i32> @fptosi_v2bf16_to_v2i32(<2 x bfloat> %a) { ; BF16-LABEL: fptosi_v2bf16_to_v2i32: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $1, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm1 ; BF16-NEXT: vmovd %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; BF16-NEXT: vcvttps2dq %xmm0, %xmm0 ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptosi_v2bf16_to_v2i32: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vmovw %xmm0, %eax ; AVX10_2-NEXT: shll $16, %eax ; AVX10_2-NEXT: vmovd %eax, %xmm1 ; AVX10_2-NEXT: vpextrw $1, %xmm0, %eax ; AVX10_2-NEXT: shll $16, %eax ; AVX10_2-NEXT: vmovd %eax, %xmm0 ; AVX10_2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; AVX10_2-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX10_2-NEXT: retq %cvt = fptosi <2 x bfloat> %a to <2 x i32> ret <2 x i32> %cvt } define <16 x i32> @fptosi_v16bf16_to_v16i32(<16 x bfloat> %a) { ; CHECK-LABEL: fptosi_v16bf16_to_v16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; CHECK-NEXT: vpslld $16, %zmm0, %zmm0 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 ; CHECK-NEXT: retq %cvt = fptosi <16 x bfloat> %a to <16 x i32> ret <16 x i32> %cvt } define <2 x i64> @fptosi_v2bf16_to_v2i64(<2 x bfloat> %a) { ; BF16-LABEL: fptosi_v2bf16_to_v2i64: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $1, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm1 ; BF16-NEXT: vcvttss2si %xmm1, %rax ; BF16-NEXT: vmovq %rax, %xmm1 ; BF16-NEXT: vmovd %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2si %xmm0, %rax ; BF16-NEXT: vmovq %rax, %xmm0 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptosi_v2bf16_to_v2i64: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX10_2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; AVX10_2-NEXT: vcvttps2qq %ymm0, %zmm0 ; AVX10_2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX10_2-NEXT: vzeroupper ; AVX10_2-NEXT: retq %cvt = fptosi <2 x bfloat> %a to <2 x i64> ret <2 x i64> %cvt } define <4 x i64> @fptosi_v4bf16_to_v4i64(<4 x bfloat> %a) { ; BF16-LABEL: fptosi_v4bf16_to_v4i64: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $3, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm1 ; BF16-NEXT: vcvttss2si %xmm1, %rax ; BF16-NEXT: vmovq %rax, %xmm1 ; BF16-NEXT: vpextrw $2, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2si %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; BF16-NEXT: vpextrw $1, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2si %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vmovd %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2si %xmm0, %rax ; BF16-NEXT: vmovq %rax, %xmm0 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; BF16-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptosi_v4bf16_to_v4i64: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX10_2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; AVX10_2-NEXT: vcvttps2qq %ymm0, %zmm0 ; AVX10_2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX10_2-NEXT: retq %cvt = fptosi <4 x bfloat> %a to <4 x i64> ret <4 x i64> %cvt } define <8 x i64> @fptosi_v8bf16_to_v8i64(<8 x bfloat> %a) { ; BF16-LABEL: fptosi_v8bf16_to_v8i64: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $7, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm1 ; BF16-NEXT: vcvttss2si %xmm1, %rax ; BF16-NEXT: vmovq %rax, %xmm1 ; BF16-NEXT: vpextrw $6, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2si %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; BF16-NEXT: vpextrw $5, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2si %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vpextrw $4, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm3 ; BF16-NEXT: vcvttss2si %xmm3, %rax ; BF16-NEXT: vmovq %rax, %xmm3 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; BF16-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; BF16-NEXT: vpextrw $3, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2si %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vpextrw $2, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm3 ; BF16-NEXT: vcvttss2si %xmm3, %rax ; BF16-NEXT: vmovq %rax, %xmm3 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; BF16-NEXT: vpextrw $1, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm3 ; BF16-NEXT: vcvttss2si %xmm3, %rax ; BF16-NEXT: vmovq %rax, %xmm3 ; BF16-NEXT: vmovd %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2si %xmm0, %rax ; BF16-NEXT: vmovq %rax, %xmm0 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; BF16-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; BF16-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptosi_v8bf16_to_v8i64: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX10_2-NEXT: vpslld $16, %ymm0, %ymm0 ; AVX10_2-NEXT: vcvttps2qq %ymm0, %zmm0 ; AVX10_2-NEXT: retq %cvt = fptosi <8 x bfloat> %a to <8 x i64> ret <8 x i64> %cvt } ; ; BFloat to Unsigned Integer ; define i32 @fptoui_bf16_to_i32(bfloat %a) { ; BF16-LABEL: fptoui_bf16_to_i32: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $0, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2usi %xmm0, %eax ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptoui_bf16_to_i32: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vmovw %xmm0, %eax ; AVX10_2-NEXT: shll $16, %eax ; AVX10_2-NEXT: vmovd %eax, %xmm0 ; AVX10_2-NEXT: vcvttss2usi %xmm0, %eax ; AVX10_2-NEXT: retq %cvt = fptoui bfloat %a to i32 ret i32 %cvt } define i64 @fptoui_bf16_to_i64(bfloat %a) { ; BF16-LABEL: fptoui_bf16_to_i64: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $0, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2usi %xmm0, %rax ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptoui_bf16_to_i64: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vmovw %xmm0, %eax ; AVX10_2-NEXT: shll $16, %eax ; AVX10_2-NEXT: vmovd %eax, %xmm0 ; AVX10_2-NEXT: vcvttss2usi %xmm0, %rax ; AVX10_2-NEXT: retq %cvt = fptoui bfloat %a to i64 ret i64 %cvt } define <8 x i32> @fptoui_v8bf16_to_v8i32(<8 x bfloat> %a) { ; CHECK-LABEL: fptoui_v8bf16_to_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NEXT: vpslld $16, %ymm0, %ymm0 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 ; CHECK-NEXT: retq %cvt = fptoui <8 x bfloat> %a to <8 x i32> ret <8 x i32> %cvt } define <4 x i32> @fptoui_v4bf16_to_v4i32(<4 x bfloat> %a) { ; CHECK-LABEL: fptoui_v4bf16_to_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 ; CHECK-NEXT: retq %cvt = fptoui <4 x bfloat> %a to <4 x i32> ret <4 x i32> %cvt } define <2 x i32> @fptoui_v2bf16_to_v2i32(<2 x bfloat> %a) { ; BF16-LABEL: fptoui_v2bf16_to_v2i32: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $1, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm1 ; BF16-NEXT: vmovd %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; BF16-NEXT: vcvttps2udq %xmm0, %xmm0 ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptoui_v2bf16_to_v2i32: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vmovw %xmm0, %eax ; AVX10_2-NEXT: shll $16, %eax ; AVX10_2-NEXT: vmovd %eax, %xmm1 ; AVX10_2-NEXT: vpextrw $1, %xmm0, %eax ; AVX10_2-NEXT: shll $16, %eax ; AVX10_2-NEXT: vmovd %eax, %xmm0 ; AVX10_2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; AVX10_2-NEXT: vcvttps2udq %xmm0, %xmm0 ; AVX10_2-NEXT: retq %cvt = fptoui <2 x bfloat> %a to <2 x i32> ret <2 x i32> %cvt } define <16 x i32> @fptoui_v16bf16_to_v16i32(<16 x bfloat> %a) { ; CHECK-LABEL: fptoui_v16bf16_to_v16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; CHECK-NEXT: vpslld $16, %zmm0, %zmm0 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 ; CHECK-NEXT: retq %cvt = fptoui <16 x bfloat> %a to <16 x i32> ret <16 x i32> %cvt } define <2 x i64> @fptoui_v2bf16_to_v2i64(<2 x bfloat> %a) { ; BF16-LABEL: fptoui_v2bf16_to_v2i64: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $1, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm1 ; BF16-NEXT: vcvttss2usi %xmm1, %rax ; BF16-NEXT: vmovq %rax, %xmm1 ; BF16-NEXT: vmovd %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2usi %xmm0, %rax ; BF16-NEXT: vmovq %rax, %xmm0 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptoui_v2bf16_to_v2i64: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX10_2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; AVX10_2-NEXT: vcvttps2uqq %ymm0, %zmm0 ; AVX10_2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX10_2-NEXT: vzeroupper ; AVX10_2-NEXT: retq %cvt = fptoui <2 x bfloat> %a to <2 x i64> ret <2 x i64> %cvt } define <4 x i64> @fptoui_v4bf16_to_v4i64(<4 x bfloat> %a) { ; BF16-LABEL: fptoui_v4bf16_to_v4i64: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $3, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm1 ; BF16-NEXT: vcvttss2usi %xmm1, %rax ; BF16-NEXT: vmovq %rax, %xmm1 ; BF16-NEXT: vpextrw $2, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2usi %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; BF16-NEXT: vpextrw $1, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2usi %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vmovd %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2usi %xmm0, %rax ; BF16-NEXT: vmovq %rax, %xmm0 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; BF16-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptoui_v4bf16_to_v4i64: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX10_2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; AVX10_2-NEXT: vcvttps2uqq %ymm0, %zmm0 ; AVX10_2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX10_2-NEXT: retq %cvt = fptoui <4 x bfloat> %a to <4 x i64> ret <4 x i64> %cvt } define <8 x i64> @fptoui_v8bf16_to_v8i64(<8 x bfloat> %a) { ; BF16-LABEL: fptoui_v8bf16_to_v8i64: ; BF16: # %bb.0: ; BF16-NEXT: vpextrw $7, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm1 ; BF16-NEXT: vcvttss2usi %xmm1, %rax ; BF16-NEXT: vmovq %rax, %xmm1 ; BF16-NEXT: vpextrw $6, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2usi %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; BF16-NEXT: vpextrw $5, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2usi %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vpextrw $4, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm3 ; BF16-NEXT: vcvttss2usi %xmm3, %rax ; BF16-NEXT: vmovq %rax, %xmm3 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; BF16-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; BF16-NEXT: vpextrw $3, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm2 ; BF16-NEXT: vcvttss2usi %xmm2, %rax ; BF16-NEXT: vmovq %rax, %xmm2 ; BF16-NEXT: vpextrw $2, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm3 ; BF16-NEXT: vcvttss2usi %xmm3, %rax ; BF16-NEXT: vmovq %rax, %xmm3 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; BF16-NEXT: vpextrw $1, %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm3 ; BF16-NEXT: vcvttss2usi %xmm3, %rax ; BF16-NEXT: vmovq %rax, %xmm3 ; BF16-NEXT: vmovd %xmm0, %eax ; BF16-NEXT: shll $16, %eax ; BF16-NEXT: vmovd %eax, %xmm0 ; BF16-NEXT: vcvttss2usi %xmm0, %rax ; BF16-NEXT: vmovq %rax, %xmm0 ; BF16-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; BF16-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; BF16-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; BF16-NEXT: retq ; ; AVX10_2-LABEL: fptoui_v8bf16_to_v8i64: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX10_2-NEXT: vpslld $16, %ymm0, %ymm0 ; AVX10_2-NEXT: vcvttps2uqq %ymm0, %zmm0 ; AVX10_2-NEXT: retq %cvt = fptoui <8 x bfloat> %a to <8 x i64> ret <8 x i64> %cvt }