diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/midpoint-int-vec-512.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/midpoint-int-vec-512.ll | 160 |
1 files changed, 80 insertions, 80 deletions
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll index 5f6337e2..a4750b4 100644 --- a/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll @@ -507,58 +507,58 @@ define <32 x i16> @vec512_i16_signed_mem_reg(ptr %a1_addr, <32 x i16> %a2) nounw define <32 x i16> @vec512_i16_signed_reg_mem(<32 x i16> %a1, ptr %a2_addr) nounwind { ; AVX512F-LABEL: vec512_i16_signed_reg_mem: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm4 -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm5 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm2 +; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3 +; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm4 +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm5 ; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4 -; AVX512F-NEXT: vpminsw %ymm2, %ymm3, %ymm5 -; AVX512F-NEXT: vpmaxsw %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpminsw %ymm3, %ymm1, %ymm5 +; AVX512F-NEXT: vpmaxsw %ymm3, %ymm1, %ymm3 +; AVX512F-NEXT: vpsubw %ymm5, %ymm3, %ymm3 +; AVX512F-NEXT: vpminsw %ymm2, %ymm0, %ymm5 +; AVX512F-NEXT: vpmaxsw %ymm2, %ymm0, %ymm2 ; AVX512F-NEXT: vpsubw %ymm5, %ymm2, %ymm2 -; AVX512F-NEXT: vpminsw %ymm1, %ymm0, %ymm5 -; AVX512F-NEXT: vpmaxsw %ymm1, %ymm0, %ymm1 -; AVX512F-NEXT: vpsubw %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5 +; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm5 ; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512F-NEXT: vpsubw %ymm3, %ymm6, %ymm3 ; AVX512F-NEXT: vpsubw %ymm2, %ymm6, %ymm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512F-NEXT: vpternlogq {{.*#+}} zmm1 = zmm5 ^ (zmm4 & (zmm1 ^ zmm5)) -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vpaddw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpaddw %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-NEXT: vpternlogq {{.*#+}} zmm2 = zmm5 ^ (zmm4 & (zmm2 ^ zmm5)) +; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-NEXT: vpaddw %ymm1, %ymm3, %ymm1 +; AVX512F-NEXT: vpaddw %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_reg_mem: ; AVX512VL-FALLBACK: # %bb.0: -; AVX512VL-FALLBACK-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VL-FALLBACK-NEXT: vmovdqa 32(%rdi), %ymm2 -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-FALLBACK-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm4 -; AVX512VL-FALLBACK-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm5 +; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512VL-FALLBACK-NEXT: vmovdqa (%rdi), %ymm2 +; AVX512VL-FALLBACK-NEXT: vmovdqa 32(%rdi), %ymm3 +; AVX512VL-FALLBACK-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm4 +; AVX512VL-FALLBACK-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm5 ; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4 -; AVX512VL-FALLBACK-NEXT: vpminsw %ymm2, %ymm3, %ymm5 -; AVX512VL-FALLBACK-NEXT: vpmaxsw %ymm2, %ymm3, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpminsw %ymm3, %ymm1, %ymm5 +; AVX512VL-FALLBACK-NEXT: vpmaxsw %ymm3, %ymm1, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpminsw %ymm2, %ymm0, %ymm5 +; AVX512VL-FALLBACK-NEXT: vpmaxsw %ymm2, %ymm0, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm2, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpminsw %ymm1, %ymm0, %ymm5 -; AVX512VL-FALLBACK-NEXT: vpmaxsw %ymm1, %ymm0, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm5, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5 +; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm5 ; AVX512VL-FALLBACK-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm3, %ymm6, %ymm3 ; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm6, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm6, %ymm1 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512VL-FALLBACK-NEXT: vpternlogq {{.*#+}} zmm1 = zmm5 ^ (zmm4 & (zmm1 ^ zmm5)) -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm2, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512VL-FALLBACK-NEXT: vpternlogq {{.*#+}} zmm2 = zmm5 ^ (zmm4 & (zmm2 ^ zmm5)) +; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm1, %ymm3, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512VL-FALLBACK-NEXT: retq ; ; AVX512BW-LABEL: vec512_i16_signed_reg_mem: @@ -939,66 +939,66 @@ define <64 x i8> @vec512_i8_signed_mem_reg(ptr %a1_addr, <64 x i8> %a2) nounwind define <64 x i8> @vec512_i8_signed_reg_mem(<64 x i8> %a1, ptr %a2_addr) nounwind { ; AVX512F-LABEL: vec512_i8_signed_reg_mem: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm4 -; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm5 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm2 +; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3 +; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4 +; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm5 ; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4 -; AVX512F-NEXT: vpminsb %ymm2, %ymm3, %ymm5 -; AVX512F-NEXT: vpmaxsb %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpminsb %ymm3, %ymm1, %ymm5 +; AVX512F-NEXT: vpmaxsb %ymm3, %ymm1, %ymm3 +; AVX512F-NEXT: vpsubb %ymm5, %ymm3, %ymm3 +; AVX512F-NEXT: vpminsb %ymm2, %ymm0, %ymm5 +; AVX512F-NEXT: vpmaxsb %ymm2, %ymm0, %ymm2 ; AVX512F-NEXT: vpsubb %ymm5, %ymm2, %ymm2 -; AVX512F-NEXT: vpminsb %ymm1, %ymm0, %ymm5 -; AVX512F-NEXT: vpmaxsb %ymm1, %ymm0, %ymm1 -; AVX512F-NEXT: vpsubb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5 +; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm5 ; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] ; AVX512F-NEXT: vpandq %zmm6, %zmm5, %zmm5 -; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2 +; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3 ; AVX512F-NEXT: vpxor %xmm7, %xmm7, %xmm7 +; AVX512F-NEXT: vpsubb %ymm3, %ymm7, %ymm3 +; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2 ; AVX512F-NEXT: vpsubb %ymm2, %ymm7, %ymm2 -; AVX512F-NEXT: vpand %ymm6, %ymm1, %ymm1 -; AVX512F-NEXT: vpsubb %ymm1, %ymm7, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512F-NEXT: vpternlogq {{.*#+}} zmm1 = zmm5 ^ (zmm4 & (zmm1 ^ zmm5)) -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-NEXT: vpternlogq {{.*#+}} zmm2 = zmm5 ^ (zmm4 & (zmm2 ^ zmm5)) +; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm1 +; AVX512F-NEXT: vpaddb %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-FALLBACK-LABEL: vec512_i8_signed_reg_mem: ; AVX512VL-FALLBACK: # %bb.0: -; AVX512VL-FALLBACK-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VL-FALLBACK-NEXT: vmovdqa 32(%rdi), %ymm2 -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm4 -; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm5 +; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512VL-FALLBACK-NEXT: vmovdqa (%rdi), %ymm2 +; AVX512VL-FALLBACK-NEXT: vmovdqa 32(%rdi), %ymm3 +; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4 +; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm5 ; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4 -; AVX512VL-FALLBACK-NEXT: vpminsb %ymm2, %ymm3, %ymm5 -; AVX512VL-FALLBACK-NEXT: vpmaxsb %ymm2, %ymm3, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpminsb %ymm3, %ymm1, %ymm5 +; AVX512VL-FALLBACK-NEXT: vpmaxsb %ymm3, %ymm1, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpsubb %ymm5, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpminsb %ymm2, %ymm0, %ymm5 +; AVX512VL-FALLBACK-NEXT: vpmaxsb %ymm2, %ymm0, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpsubb %ymm5, %ymm2, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpminsb %ymm1, %ymm0, %ymm5 -; AVX512VL-FALLBACK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubb %ymm5, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm5 +; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm5 ; AVX512VL-FALLBACK-NEXT: vpbroadcastd {{.*#+}} zmm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] ; AVX512VL-FALLBACK-NEXT: vpandq %zmm6, %zmm5, %zmm5 -; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm3, %ymm3 ; AVX512VL-FALLBACK-NEXT: vpxor %xmm7, %xmm7, %xmm7 +; AVX512VL-FALLBACK-NEXT: vpsubb %ymm3, %ymm7, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm2, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm7, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm7, %ymm1 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512VL-FALLBACK-NEXT: vpternlogq {{.*#+}} zmm1 = zmm5 ^ (zmm4 & (zmm1 ^ zmm5)) -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpaddb %ymm3, %ymm2, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512VL-FALLBACK-NEXT: vpternlogq {{.*#+}} zmm2 = zmm5 ^ (zmm4 & (zmm2 ^ zmm5)) +; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpaddb %ymm1, %ymm3, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 +; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512VL-FALLBACK-NEXT: retq ; ; AVX512BW-LABEL: vec512_i8_signed_reg_mem: |