; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=arm64-none-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK-LABEL: test_uabd_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: uabd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) ret <8 x i8> %abd } define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK-LABEL: test_uaba_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: uaba v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) %aba = add <8 x i8> %lhs, %abd ret <8 x i8> %aba } define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK-LABEL: test_sabd_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sabd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) ret <8 x i8> %abd } define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK-LABEL: test_saba_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: saba v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) %aba = add <8 x i8> %lhs, %abd ret <8 x i8> %aba } declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK-LABEL: test_uabd_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: uabd v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) ret <16 x i8> %abd } define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK-LABEL: test_uaba_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: uaba v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) %aba = add <16 x i8> %lhs, %abd ret <16 x i8> %aba } define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK-LABEL: test_sabd_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sabd v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) ret <16 x i8> %abd } define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK-LABEL: test_saba_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: saba v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) %aba = add <16 x i8> %lhs, %abd ret <16 x i8> %aba } declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_uabd_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) ret <4 x i16> %abd } define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_uaba_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: uaba v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) %aba = add <4 x i16> %lhs, %abd ret <4 x i16> %aba } define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_sabd_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) ret <4 x i16> %abd } define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_saba_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: saba v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) %aba = add <4 x i16> %lhs, %abd ret <4 x i16> %aba } declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_uabd_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) ret <8 x i16> %abd } define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_uaba_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: uaba v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) %aba = add <8 x i16> %lhs, %abd ret <8 x i16> %aba } define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_sabd_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) ret <8 x i16> %abd } define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_saba_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: saba v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) %aba = add <8 x i16> %lhs, %abd ret <8 x i16> %aba } declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_uabd_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %abd } define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_uaba_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: uaba v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) %aba = add <2 x i32> %lhs, %abd ret <2 x i32> %aba } define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_sabd_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %abd } define <2 x i32> @test_sabd_v2i32_const() { ; CHECK-SD-LABEL: test_sabd_v2i32_const: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI19_0 ; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI19_0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_sabd_v2i32_const: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI19_1 ; CHECK-GI-NEXT: adrp x9, .LCPI19_0 ; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI19_1] ; CHECK-GI-NEXT: ldr d1, [x9, :lo12:.LCPI19_0] ; CHECK-GI-NEXT: sabd v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: ret %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32( <2 x i32> , <2 x i32> ) ret <2 x i32> %1 } define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_saba_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: saba v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) %aba = add <2 x i32> %lhs, %abd ret <2 x i32> %aba } declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_uabd_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ret <4 x i32> %abd } define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_uaba_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: uaba v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) %aba = add <4 x i32> %lhs, %abd ret <4 x i32> %aba } define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_sabd_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ret <4 x i32> %abd } define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_saba_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: saba v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) %aba = add <4 x i32> %lhs, %abd ret <4 x i32> %aba } declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) { ; CHECK-LABEL: test_fabd_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %abd = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs) ret <2 x float> %abd } declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) { ; CHECK-LABEL: test_fabd_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fabd v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %abd = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs) ret <4 x float> %abd } declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK-LABEL: test_fabd_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fabd v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %abd = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs) ret <2 x double> %abd } define <8 x i16> @test_uabd_knownbits_vec8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-SD-LABEL: test_uabd_knownbits_vec8i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi v2.8h, #15 ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: rev64 v0.8h, v0.8h ; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_uabd_knownbits_vec8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v2.8h, #15 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: rev64 v0.8h, v0.8h ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: ret %and1 = and <8 x i16> %lhs, %and2 = and <8 x i16> %rhs, %uabd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %and1, <8 x i16> %and2) %suff = shufflevector <8 x i16> %uabd, <8 x i16> undef, <8 x i32> %res = and <8 x i16> %suff, ret <8 x i16> %res } define <4 x i32> @knownbits_uabd_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-SD-LABEL: knownbits_uabd_mask_and_shuffle_lshr: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #17 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: knownbits_uabd_mask_and_shuffle_lshr: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: rev64 v0.4s, v0.4s ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #17 ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = and <4 x i32> %a1, %3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2) %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> %5 = lshr <4 x i32> %4, ret <4 x i32> %5 } define <4 x i32> @knownbits_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-SD-LABEL: knownbits_mask_and_shuffle_lshr: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: knownbits_mask_and_shuffle_lshr: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v2.4s, #127, msl #8 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #17 ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = and <4 x i32> %a1, %3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2) %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> %5 = lshr <4 x i32> %4, ret <4 x i32> %5 } define <4 x i32> @test_sabd_knownbits_vec4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-SD-LABEL: test_sabd_knownbits_vec4i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI31_0 ; CHECK-SD-NEXT: adrp x9, .LCPI31_1 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI31_0] ; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI31_1] ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b ; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-SD-NEXT: mov v0.s[1], v0.s[0] ; CHECK-SD-NEXT: trn2 v0.4s, v0.4s, v0.4s ; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_sabd_knownbits_vec4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI31_2 ; CHECK-GI-NEXT: adrp x9, .LCPI31_1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI31_2] ; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI31_1] ; CHECK-GI-NEXT: adrp x8, .LCPI31_0 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI31_0] ; CHECK-GI-NEXT: movi v3.2d, #0x0000ff000000ff ; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-GI-NEXT: ret %and1 = and <4 x i32> %lhs, %and2 = and <4 x i32> %rhs, %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %and1, <4 x i32> %and2) %s = shufflevector <4 x i32> %abd, <4 x i32> undef, <4 x i32> %4 = and <4 x i32> %s, ret <4 x i32> %4 } define <4 x i32> @knownbits_sabd_and_mask(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-SD-LABEL: knownbits_sabd_and_mask: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI32_0 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI32_0] ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: zip2 v0.4s, v0.4s, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: knownbits_sabd_and_mask: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI32_1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI32_1] ; CHECK-GI-NEXT: adrp x8, .LCPI32_0 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI32_0] ; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = and <4 x i32> %a1, %3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %1, <4 x i32> %2) %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> ret <4 x i32> %4 } define <4 x i32> @knownbits_sabd_and_or_mask(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-SD-LABEL: knownbits_sabd_and_or_mask: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: knownbits_sabd_and_or_mask: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI33_1 ; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_1] ; CHECK-GI-NEXT: adrp x8, .LCPI33_0 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v3.16b ; CHECK-GI-NEXT: orr v1.16b, v1.16b, v3.16b ; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = or <4 x i32> %1, %3 = and <4 x i32> %a1, %4 = or <4 x i32> %3, %5 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %2, <4 x i32> %4) %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> ret <4 x i32> %6 } define <4 x i32> @knownbits_sabd_and_xor_mask(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-SD-LABEL: knownbits_sabd_and_xor_mask: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI34_0 ; CHECK-SD-NEXT: movi v3.2d, #0x00ffff0000ffff ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: eor v0.16b, v0.16b, v3.16b ; CHECK-SD-NEXT: eor v1.16b, v1.16b, v3.16b ; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: zip2 v0.4s, v0.4s, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: knownbits_sabd_and_xor_mask: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI34_1 ; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_1] ; CHECK-GI-NEXT: adrp x8, .LCPI34_0 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NEXT: eor v0.16b, v0.16b, v3.16b ; CHECK-GI-NEXT: eor v1.16b, v1.16b, v3.16b ; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = xor <4 x i32> %1, %3 = and <4 x i32> %a1, %4 = xor <4 x i32> %3, %5 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %2, <4 x i32> %4) %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> ret <4 x i32> %6 } define <4 x i32> @knownbits_sabd_and_shl_mask(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-SD-LABEL: knownbits_sabd_and_shl_mask: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: knownbits_sabd_and_shl_mask: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI35_1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_1] ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #17 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #17 ; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = shl <4 x i32> %1, %3 = and <4 x i32> %a1, %4 = shl <4 x i32> %3, %5 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %2, <4 x i32> %4) %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> ret <4 x i32> %6 } define <4 x i32> @knownbits_sabd_and_mul_mask(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-SD-LABEL: knownbits_sabd_and_mul_mask: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI36_0 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] ; CHECK-SD-NEXT: and v3.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: and v2.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: mul v0.4s, v0.4s, v3.4s ; CHECK-SD-NEXT: mul v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: mov v0.s[1], v0.s[0] ; CHECK-SD-NEXT: trn2 v0.4s, v0.4s, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: knownbits_sabd_and_mul_mask: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI36_1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_1] ; CHECK-GI-NEXT: adrp x8, .LCPI36_0 ; CHECK-GI-NEXT: and v3.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: and v2.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: mul v0.4s, v0.4s, v3.4s ; CHECK-GI-NEXT: mul v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] ; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = mul <4 x i32> %a0, %1 %3 = and <4 x i32> %a1, %4 = mul <4 x i32> %a1, %3 %5 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %2, <4 x i32> %4) %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> ret <4 x i32> %6 }