; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 ; VNNI FP16 define <16 x float> @test_mm512_dpph_ps(<16 x float> %__W, <32 x half> %__A, <32 x half> %__B) { ; CHECK-LABEL: test_mm512_dpph_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vdpphps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0x52,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = tail call <16 x float> @llvm.x86.avx10.vdpphps.512(<16 x float> %__W, <32 x half> %__A, <32 x half> %__B) ret <16 x float> %res } define <16 x float> @test_mm512_mask_dpph_ps(<16 x float> %__W, i16 zeroext %__U, <32 x half> %__A, <32 x half> %__B) { ; X86-LABEL: test_mm512_mask_dpph_ps: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vdpphps %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0x52,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_mask_dpph_ps: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vdpphps %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0x52,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dph = tail call <16 x float> @llvm.x86.avx10.vdpphps.512(<16 x float> %__W, <32 x half> %__A, <32 x half> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x float> %dph, <16 x float> %__W ret <16 x float> %res } define <16 x float> @test_mm512_maskz_dpph_ps(i16 zeroext %__U, <16 x float> %__W, <32 x half> %__A, <32 x half> %__B) { ; X86-LABEL: test_mm512_maskz_dpph_ps: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vdpphps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0x52,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_maskz_dpph_ps: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vdpphps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0x52,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dph = tail call <16 x float> @llvm.x86.avx10.vdpphps.512(<16 x float> %__W, <32 x half> %__A, <32 x half> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x float> %dph, <16 x float> zeroinitializer ret <16 x float> %res } declare <16 x float> @llvm.x86.avx10.vdpphps.512(<16 x float>, <32 x half>, <32 x half>) ; VNNI INT8 define <16 x i32> @test_mm512_dpbssd_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) { ; X86-LABEL: test_mm512_dpbssd_epi32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vpdpbssd (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x77,0x48,0x50,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_dpbssd_epi32: ; X64: # %bb.0: ; X64-NEXT: vpdpbssd (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x77,0x48,0x50,0x07] ; X64-NEXT: retq # encoding: [0xc3] %__B = load <16 x i32>, ptr %pB %res = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) ret <16 x i32> %res } define <16 x i32> @test_mm512_mask_dpbssds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_mask_dpbssds_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpbssds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x51,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_mask_dpbssds_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpbssds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x51,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbssds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W ret <16 x i32> %res } define <16 x i32> @test_mm512_maskz_dpbssd_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_maskz_dpbssd_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpbssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x77,0xc9,0x50,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_maskz_dpbssd_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpbssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x77,0xc9,0x50,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer ret <16 x i32> %res } declare <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32>, <16 x i32>, <16 x i32>) declare <16 x i32> @llvm.x86.avx10.vpdpbssds.512(<16 x i32>, <16 x i32>, <16 x i32>) define <16 x i32> @test_mm512_dpbsud_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) { ; X86-LABEL: test_mm512_dpbsud_epi32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vpdpbsud (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0x50,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_dpbsud_epi32: ; X64: # %bb.0: ; X64-NEXT: vpdpbsud (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0x50,0x07] ; X64-NEXT: retq # encoding: [0xc3] %__B = load <16 x i32>, ptr %pB %res = tail call <16 x i32> @llvm.x86.avx10.vpdpbsud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) ret <16 x i32> %res } define <16 x i32> @test_mm512_mask_dpbsuds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_mask_dpbsuds_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpbsuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x51,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_mask_dpbsuds_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpbsuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x51,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbsuds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W ret <16 x i32> %res } define <16 x i32> @test_mm512_maskz_dpbsud_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_maskz_dpbsud_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpbsud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x50,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_maskz_dpbsud_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpbsud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x50,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbsud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer ret <16 x i32> %res } declare <16 x i32> @llvm.x86.avx10.vpdpbsud.512(<16 x i32>, <16 x i32>, <16 x i32>) declare <16 x i32> @llvm.x86.avx10.vpdpbsuds.512(<16 x i32>, <16 x i32>, <16 x i32>) define <16 x i32> @test_mm512_dpbuud_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) { ; X86-LABEL: test_mm512_dpbuud_epi32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vpdpbuud (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0x50,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_dpbuud_epi32: ; X64: # %bb.0: ; X64-NEXT: vpdpbuud (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0x50,0x07] ; X64-NEXT: retq # encoding: [0xc3] %__B = load <16 x i32>, ptr %pB %res = tail call <16 x i32> @llvm.x86.avx10.vpdpbuud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) ret <16 x i32> %res } define <16 x i32> @test_mm512_mask_dpbuuds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_mask_dpbuuds_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpbuuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0x51,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_mask_dpbuuds_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpbuuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0x51,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbuuds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W ret <16 x i32> %res } define <16 x i32> @test_mm512_maskz_dpbuud_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_maskz_dpbuud_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpbuud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0x50,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_maskz_dpbuud_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpbuud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0x50,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbuud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer ret <16 x i32> %res } declare <16 x i32> @llvm.x86.avx10.vpdpbuud.512(<16 x i32>, <16 x i32>, <16 x i32>) declare <16 x i32> @llvm.x86.avx10.vpdpbuuds.512(<16 x i32>, <16 x i32>, <16 x i32>) ; VNNI INT16 define <16 x i32> @test_mm512_dpwsud_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) { ; X86-LABEL: test_mm512_dpwsud_epi32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vpdpwsud (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0xd2,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_dpwsud_epi32: ; X64: # %bb.0: ; X64-NEXT: vpdpwsud (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0xd2,0x07] ; X64-NEXT: retq # encoding: [0xc3] %__B = load <16 x i32>, ptr %pB %res = tail call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) ret <16 x i32> %res } define <16 x i32> @test_mm512_mask_dpwsuds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_mask_dpwsuds_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpwsuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0xd3,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_mask_dpwsuds_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpwsuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0xd3,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W ret <16 x i32> %res } define <16 x i32> @test_mm512_maskz_dpwsud_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_maskz_dpwsud_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpwsud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0xd2,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_maskz_dpwsud_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpwsud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0xd2,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer ret <16 x i32> %res } declare <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32>, <16 x i32>, <16 x i32>) declare <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32>, <16 x i32>, <16 x i32>) define <16 x i32> @test_mm512_dpwusd_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) { ; X86-LABEL: test_mm512_dpwusd_epi32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vpdpwusd (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xd2,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_dpwusd_epi32: ; X64: # %bb.0: ; X64-NEXT: vpdpwusd (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xd2,0x07] ; X64-NEXT: retq # encoding: [0xc3] %__B = load <16 x i32>, ptr %pB %res = tail call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) ret <16 x i32> %res } define <16 x i32> @test_mm512_mask_dpwusds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_mask_dpwusds_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpwusds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0xd3,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_mask_dpwusds_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpwusds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0xd3,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W ret <16 x i32> %res } define <16 x i32> @test_mm512_maskz_dpwusd_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_maskz_dpwusd_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpwusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xd2,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_maskz_dpwusd_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpwusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xd2,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer ret <16 x i32> %res } declare <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32>, <16 x i32>, <16 x i32>) declare <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32>, <16 x i32>, <16 x i32>) define <16 x i32> @test_mm512_dpwuud_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) { ; X86-LABEL: test_mm512_dpwuud_epi32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vpdpwuud (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0xd2,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_dpwuud_epi32: ; X64: # %bb.0: ; X64-NEXT: vpdpwuud (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0xd2,0x07] ; X64-NEXT: retq # encoding: [0xc3] %__B = load <16 x i32>, ptr %pB %res = tail call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) ret <16 x i32> %res } define <16 x i32> @test_mm512_mask_dpwuuds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_mask_dpwuuds_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpwuuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0xd3,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_mask_dpwuuds_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpwuuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0xd3,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W ret <16 x i32> %res } define <16 x i32> @test_mm512_maskz_dpwuud_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) { ; X86-LABEL: test_mm512_maskz_dpwuud_epi32: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86-NEXT: vpdpwuud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0xd2,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_maskz_dpwuud_epi32: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vpdpwuud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0xd2,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) %bst = bitcast i16 %__U to <16 x i1> %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer ret <16 x i32> %res } declare <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32>, <16 x i32>, <16 x i32>) declare <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32>, <16 x i32>, <16 x i32>) ; VMPSADBW define { <32 x i16>, <32 x i16>, <32 x i16> } @test_mm512_mask_mpsadbw(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { ; X86-LABEL: test_mm512_mask_mpsadbw: ; X86: # %bb.0: ; X86-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] ; X86-NEXT: vmpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7e,0x48,0x42,0xd9,0x02] ; X86-NEXT: vmpsadbw $3, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x49,0x42,0xe1,0x03] ; X86-NEXT: vmpsadbw $4, %zmm1, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xc9,0x42,0xd1,0x04] ; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm512_mask_mpsadbw: ; X64: # %bb.0: ; X64-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] ; X64-NEXT: vmpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7e,0x48,0x42,0xd9,0x02] ; X64-NEXT: vmpsadbw $3, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x49,0x42,0xe1,0x03] ; X64-NEXT: vmpsadbw $4, %zmm1, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xc9,0x42,0xd1,0x04] ; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc] ; X64-NEXT: retq # encoding: [0xc3] %msk = bitcast i32 %x4 to <32 x i1> %rs1 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 2) %ad2 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 3) %rs2 = select <32 x i1> %msk, <32 x i16> %ad2, <32 x i16> %x3 %ad3 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 4) %rs3 = select <32 x i1> %msk, <32 x i16> %ad3, <32 x i16> zeroinitializer %rs4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } undef, <32 x i16> %rs1, 0 %rs5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %rs4, <32 x i16> %rs2, 1 %rs6 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %rs5, <32 x i16> %rs3, 2 ret { <32 x i16>, <32 x i16>, <32 x i16> } %rs6 } declare <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8>, <64 x i8>, i8) ; Regression test define <8 x float> @avx_dp_ps(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: avx_dp_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: vdpps $255, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0xff] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %r = tail call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a, <8 x float> %b, i8 -1) ret <8 x float> %r }