; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; CHECK-GI: warning: Instruction selection used fallback path for sqshl1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl1d_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu1d_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i64_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i32_constant ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn1s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn2s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun1s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun2s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn1s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn2s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun1s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun2s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn1s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn2s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn1s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn2s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_vscalar_constant_shift ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_scalar_constant_shift ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_vscalar_constant_shift ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift_m1 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra_scalar ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d_imm0 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2d ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_zero_shift_amount ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_trunc_v2i64_v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_trunc_v2i64_v2i8 define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } define <1 x i64> @sqshl1d_constant(ptr %A) nounwind { ; CHECK-LABEL: sqshl1d_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: sqshl d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: sqshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp2 = load i64, ptr %B %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2) ret i64 %tmp3 } define i64 @sqshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: sqshl_scalar_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: sqshl d0, d0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1) ret i64 %tmp3 } define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } define <1 x i64> @uqshl1d_constant(ptr %A) nounwind { ; CHECK-LABEL: uqshl1d_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: uqshl d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: uqshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp2 = load i64, ptr %B %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2) ret i64 %tmp3 } define i64 @uqshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: uqshl_scalar_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: uqshl d0, d0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1) ret i64 %tmp3 } declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srshl8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: srshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srshl4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: srshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srshl2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: srshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srshl1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: srshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } define <1 x i64> @srshl1d_constant(ptr %A) nounwind { ; CHECK-LABEL: srshl1d_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: srshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @srshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srshl_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: srshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp2 = load i64, ptr %B %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2) ret i64 %tmp3 } define i64 @srshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: srshl_scalar_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: srshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1) ret i64 %tmp3 } define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: urshl8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: urshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: urshl4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: urshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: urshl2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: urshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: urshl1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: urshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } define <1 x i64> @urshl1d_constant(ptr %A) nounwind { ; CHECK-LABEL: urshl1d_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: urshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @urshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: urshl_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: urshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp2 = load i64, ptr %B %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2) ret i64 %tmp3 } define i64 @urshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: urshl_scalar_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: urshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1) ret i64 %tmp3 } define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srshl16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: srshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srshl8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: srshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srshl4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: srshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srshl2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: srshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: urshl16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: urshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: urshl8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: urshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: urshl4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: urshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: urshl2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: urshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqrshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqrshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqrshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqrshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqrshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqrshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqrshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind { ; CHECK-LABEL: sqrshl1d_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: sqrshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: sqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp2 = load i64, ptr %B %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2) ret i64 %tmp3 } define i64 @sqrshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: sqrshl_scalar_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: sqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1) ret i64 %tmp3 } define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqrshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqrshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqrshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqrshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqrshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind { ; CHECK-LABEL: uqrshl1d_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: uqrshl d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: uqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp2 = load i64, ptr %B %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2) ret i64 %tmp3 } define i64 @uqrshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: uqrshl_scalar_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: uqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1) ret i64 %tmp3 } declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @urshr8b(ptr %A) nounwind { ; CHECK-SD-LABEL: urshr8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: urshr v0.8b, v0.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: urshr8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @urshr4h(ptr %A) nounwind { ; CHECK-SD-LABEL: urshr4h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: urshr v0.4h, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: urshr4h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @urshr2s(ptr %A) nounwind { ; CHECK-SD-LABEL: urshr2s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: urshr v0.2s, v0.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: urshr2s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @urshr16b(ptr %A) nounwind { ; CHECK-SD-LABEL: urshr16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: urshr v0.16b, v0.16b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: urshr16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @urshr8h(ptr %A) nounwind { ; CHECK-SD-LABEL: urshr8h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: urshr v0.8h, v0.8h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: urshr8h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @urshr4s(ptr %A) nounwind { ; CHECK-SD-LABEL: urshr4s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: urshr v0.4s, v0.4s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: urshr4s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @urshr2d(ptr %A) nounwind { ; CHECK-SD-LABEL: urshr2d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: urshr v0.2d, v0.2d, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: urshr2d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } define <1 x i64> @urshr1d(ptr %A) nounwind { ; CHECK-LABEL: urshr1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: urshr d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @urshr_scalar(ptr %A) nounwind { ; CHECK-LABEL: urshr_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: urshr d0, d0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1) ret i64 %tmp3 } define <8 x i8> @srshr8b(ptr %A) nounwind { ; CHECK-SD-LABEL: srshr8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: srshr v0.8b, v0.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srshr8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @srshr4h(ptr %A) nounwind { ; CHECK-SD-LABEL: srshr4h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: srshr v0.4h, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srshr4h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @srshr2s(ptr %A) nounwind { ; CHECK-SD-LABEL: srshr2s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: srshr v0.2s, v0.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srshr2s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @srshr16b(ptr %A) nounwind { ; CHECK-SD-LABEL: srshr16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: srshr v0.16b, v0.16b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srshr16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @srshr8h(ptr %A) nounwind { ; CHECK-SD-LABEL: srshr8h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: srshr v0.8h, v0.8h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srshr8h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @srshr4s(ptr %A) nounwind { ; CHECK-SD-LABEL: srshr4s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: srshr v0.4s, v0.4s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srshr4s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @srshr2d(ptr %A) nounwind { ; CHECK-SD-LABEL: srshr2d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: srshr v0.2d, v0.2d, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srshr2d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } define <1 x i64> @srshr1d(ptr %A) nounwind { ; CHECK-LABEL: srshr1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: srshr d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @srshr_scalar(ptr %A) nounwind { ; CHECK-LABEL: srshr_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: srshr d0, d0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1) ret i64 %tmp3 } define <8 x i8> @sqshlu8b(ptr %A) nounwind { ; CHECK-LABEL: sqshlu8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: sqshlu v0.8b, v0.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @sqshlu4h(ptr %A) nounwind { ; CHECK-LABEL: sqshlu4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: sqshlu v0.4h, v0.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @sqshlu2s(ptr %A) nounwind { ; CHECK-LABEL: sqshlu2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: sqshlu v0.2s, v0.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @sqshlu16b(ptr %A) nounwind { ; CHECK-LABEL: sqshlu16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshlu v0.16b, v0.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @sqshlu8h(ptr %A) nounwind { ; CHECK-LABEL: sqshlu8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshlu v0.8h, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @sqshlu4s(ptr %A) nounwind { ; CHECK-LABEL: sqshlu4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshlu v0.4s, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @sqshlu2d(ptr %A) nounwind { ; CHECK-LABEL: sqshlu2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshlu v0.2d, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind { ; CHECK-LABEL: sqshlu1d_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: sqshlu d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @sqshlu_i64_constant(ptr %A) nounwind { ; CHECK-LABEL: sqshlu_i64_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: sqshlu d0, d0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1) ret i64 %tmp3 } define i32 @sqshlu_i32_constant(ptr %A) nounwind { ; CHECK-LABEL: sqshlu_i32_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: sqshlu s0, s0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp1 = load i32, ptr %A %tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1) ret i32 %tmp3 } declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @rshrn8b(ptr %A) nounwind { ; CHECK-LABEL: rshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: rshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } define <4 x i16> @rshrn4h(ptr %A) nounwind { ; CHECK-LABEL: rshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: rshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } define <2 x i32> @rshrn2s(ptr %A) nounwind { ; CHECK-LABEL: rshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: rshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: rshrn16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: rshrn8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: rshrn4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone define <8 x i8> @shrn8b(ptr %A) nounwind { ; CHECK-LABEL: shrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: shrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = lshr <8 x i16> %tmp1, %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> ret <8 x i8> %tmp3 } define <4 x i16> @shrn4h(ptr %A) nounwind { ; CHECK-LABEL: shrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: shrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = lshr <4 x i32> %tmp1, %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> ret <4 x i16> %tmp3 } define <2 x i32> @shrn2s(ptr %A) nounwind { ; CHECK-LABEL: shrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: shrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = lshr <2 x i64> %tmp1, %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> ret <2 x i32> %tmp3 } define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: shrn16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: shrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = lshr <8 x i16> %tmp1, %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: shrn8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: shrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = lshr <4 x i32> %tmp1, %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: shrn4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: shrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = lshr <2 x i64> %tmp1, %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone define i32 @sqshrn1s(i64 %A) nounwind { ; CHECK-LABEL: sqshrn1s: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: sqshrn s0, d0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1) ret i32 %tmp } define <8 x i8> @sqshrn8b(ptr %A) nounwind { ; CHECK-LABEL: sqshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } define <4 x i16> @sqshrn4h(ptr %A) nounwind { ; CHECK-LABEL: sqshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } define <2 x i32> @sqshrn2s(ptr %A) nounwind { ; CHECK-LABEL: sqshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqshrn16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqshrn8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqshrn4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone define i32 @sqshrun1s(i64 %A) nounwind { ; CHECK-LABEL: sqshrun1s: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: sqshrun s0, d0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1) ret i32 %tmp } define <8 x i8> @sqshrun8b(ptr %A) nounwind { ; CHECK-LABEL: sqshrun8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshrun v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } define <4 x i16> @sqshrun4h(ptr %A) nounwind { ; CHECK-LABEL: sqshrun4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshrun v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } define <2 x i32> @sqshrun2s(ptr %A) nounwind { ; CHECK-LABEL: sqshrun2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqshrun v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqshrun16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqshrun8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqshrun4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone define i32 @sqrshrn1s(i64 %A) nounwind { ; CHECK-LABEL: sqrshrn1s: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: sqrshrn s0, d0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1) ret i32 %tmp } define <8 x i8> @sqrshrn8b(ptr %A) nounwind { ; CHECK-LABEL: sqrshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqrshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } define <4 x i16> @sqrshrn4h(ptr %A) nounwind { ; CHECK-LABEL: sqrshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqrshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } define <2 x i32> @sqrshrn2s(ptr %A) nounwind { ; CHECK-LABEL: sqrshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqrshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqrshrn16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqrshrn8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqrshrn4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone define i32 @sqrshrun1s(i64 %A) nounwind { ; CHECK-LABEL: sqrshrun1s: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: sqrshrun s0, d0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1) ret i32 %tmp } define <8 x i8> @sqrshrun8b(ptr %A) nounwind { ; CHECK-LABEL: sqrshrun8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqrshrun v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } define <4 x i16> @sqrshrun4h(ptr %A) nounwind { ; CHECK-LABEL: sqrshrun4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqrshrun v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } define <2 x i32> @sqrshrun2s(ptr %A) nounwind { ; CHECK-LABEL: sqrshrun2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sqrshrun v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqrshrun16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshrun2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqrshrun8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshrun2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: sqrshrun4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sqrshrun2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone define i32 @uqrshrn1s(i64 %A) nounwind { ; CHECK-LABEL: uqrshrn1s: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: uqrshrn s0, d0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1) ret i32 %tmp } define <8 x i8> @uqrshrn8b(ptr %A) nounwind { ; CHECK-LABEL: uqrshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uqrshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } define <4 x i16> @uqrshrn4h(ptr %A) nounwind { ; CHECK-LABEL: uqrshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uqrshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } define <2 x i32> @uqrshrn2s(ptr %A) nounwind { ; CHECK-LABEL: uqrshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uqrshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: uqrshrn16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqrshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: uqrshrn8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqrshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: uqrshrn4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqrshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone define i32 @uqshrn1s(i64 %A) nounwind { ; CHECK-LABEL: uqshrn1s: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: uqshrn s0, d0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1) ret i32 %tmp } define <8 x i8> @uqshrn8b(ptr %A) nounwind { ; CHECK-LABEL: uqshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uqshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) ret <8 x i8> %tmp3 } define <4 x i16> @uqshrn4h(ptr %A) nounwind { ; CHECK-LABEL: uqshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uqshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) ret <4 x i16> %tmp3 } define <2 x i32> @uqshrn2s(ptr %A) nounwind { ; CHECK-LABEL: uqshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uqshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) ret <2 x i32> %tmp3 } define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: uqshrn16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> ret <16 x i8> %tmp4 } define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: uqshrn8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> ret <8 x i16> %tmp4 } define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK-LABEL: uqshrn4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> ret <4 x i32> %tmp4 } declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone define <8 x i16> @ushll8h(ptr %A) nounwind { ; CHECK-LABEL: ushll8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ushll v0.8h, v0.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> %tmp3 = shl <8 x i16> %tmp2, ret <8 x i16> %tmp3 } define <4 x i32> @ushll4s(ptr %A) nounwind { ; CHECK-LABEL: ushll4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ushll v0.4s, v0.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> %tmp3 = shl <4 x i32> %tmp2, ret <4 x i32> %tmp3 } define <2 x i64> @ushll2d(ptr %A) nounwind { ; CHECK-LABEL: ushll2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ushll v0.2d, v0.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> %tmp3 = shl <2 x i64> %tmp2, ret <2 x i64> %tmp3 } define <8 x i16> @ushll2_8h(ptr %A) nounwind { ; CHECK-SD-LABEL: ushll2_8h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0, #8] ; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ushll2_8h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: mov d0, v0.d[1] ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #1 ; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> %tmp3 = shl <8 x i16> %tmp2, ret <8 x i16> %tmp3 } define <4 x i32> @ushll2_4s(ptr %A) nounwind { ; CHECK-SD-LABEL: ushll2_4s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0, #8] ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ushll2_4s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: mov d0, v0.d[1] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #1 ; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> %tmp3 = shl <4 x i32> %tmp2, ret <4 x i32> %tmp3 } define <2 x i64> @ushll2_2d(ptr %A) nounwind { ; CHECK-SD-LABEL: ushll2_2d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0, #8] ; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ushll2_2d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: mov d0, v0.d[1] ; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #1 ; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> %tmp3 = shl <2 x i64> %tmp2, ret <2 x i64> %tmp3 } declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>) declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>) declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>) declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>) declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>) declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64) define <8 x i16> @neon_ushll8h_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_ushll8h_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_ushll8h_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: movi v1.8h, #1 ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: ushl v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> ) ret <8 x i16> %tmp3 } define <8 x i16> @neon_ushl8h_no_constant_shift(ptr %A) nounwind { ; CHECK-LABEL: neon_ushl8h_no_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushl v0.8h, v0.8h, v0.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } define <4 x i32> @neon_ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_ushl8h_constant_shift_extend_not_2x: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr s0, [x0] ; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_ushl8h_constant_shift_extend_not_2x: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr w8, [x0] ; CHECK-GI-NEXT: movi v0.4s, #1 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: uxtb w8, w8 ; CHECK-GI-NEXT: mov b2, v1.b[2] ; CHECK-GI-NEXT: mov b3, v1.b[1] ; CHECK-GI-NEXT: mov b4, v1.b[3] ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: fmov w9, s2 ; CHECK-GI-NEXT: fmov w10, s3 ; CHECK-GI-NEXT: fmov w11, s4 ; CHECK-GI-NEXT: uxtb w9, w9 ; CHECK-GI-NEXT: uxtb w10, w10 ; CHECK-GI-NEXT: uxtb w11, w11 ; CHECK-GI-NEXT: fmov s2, w9 ; CHECK-GI-NEXT: mov v1.h[1], w10 ; CHECK-GI-NEXT: mov v2.h[1], w11 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] ; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i8>, ptr %A %tmp2 = zext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } define <8 x i16> @neon_ushl8_noext_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_ushl8_noext_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_ushl8_noext_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.8h, #1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: ushl v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @neon_ushll4s_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_ushll4s_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_ushll4s_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: movi v1.4s, #1 ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } ; FIXME: unnecessary ushll.4s v0, v0, #0? define <4 x i32> @neon_ushll4s_neg_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_ushll4s_neg_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_ushll4s_neg_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } ; FIXME: should be constant folded. define <4 x i32> @neon_ushll4s_constant_fold() nounwind { ; CHECK-SD-LABEL: neon_ushll4s_constant_fold: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI160_0 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI160_0] ; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_ushll4s_constant_fold: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.4s, #1 ; CHECK-GI-NEXT: adrp x8, .LCPI160_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI160_0] ; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ret %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> , <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @neon_ushll2d_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_ushll2d_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_ushll2d_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: adrp x8, .LCPI161_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI161_0] ; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> ) ret <2 x i64> %tmp3 } define <1 x i64> @neon_ushl_vscalar_constant_shift(ptr %A) nounwind { ; CHECK-LABEL: neon_ushl_vscalar_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s ; CHECK-NEXT: shl d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i32>, ptr %A %tmp2 = zext <1 x i32> %tmp1 to <1 x i64> %tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @neon_ushl_scalar_constant_shift(ptr %A) nounwind { ; CHECK-LABEL: neon_ushl_scalar_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: shl d0, d0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i32, ptr %A %tmp2 = zext i32 %tmp1 to i64 %tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1) ret i64 %tmp3 } define <8 x i16> @sshll8h(ptr %A) nounwind { ; CHECK-LABEL: sshll8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: sshll v0.8h, v0.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> %tmp3 = shl <8 x i16> %tmp2, ret <8 x i16> %tmp3 } define <2 x i64> @sshll2d(ptr %A) nounwind { ; CHECK-LABEL: sshll2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: sshll v0.2d, v0.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> %tmp3 = shl <2 x i64> %tmp2, ret <2 x i64> %tmp3 } declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>) declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>) declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>) declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>) declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>) declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64) define <16 x i8> @neon_sshl16b_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshl16b_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshl16b_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.16b, #1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp2 } define <16 x i8> @neon_sshl16b_non_splat_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshl16b_non_splat_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI167_0 ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI167_0] ; CHECK-SD-NEXT: sshl v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshl16b_non_splat_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI167_0 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI167_0] ; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp2 } define <16 x i8> @neon_sshl16b_neg_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshl16b_neg_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshl16b_neg_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.16b, #254 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp2 } define <8 x i16> @neon_sshll8h_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshll8h_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshll8h_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: movi v1.8h, #1 ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @neon_sshl4s_wrong_ext_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshl4s_wrong_ext_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr s0, [x0] ; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshl4s_wrong_ext_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr w8, [x0] ; CHECK-GI-NEXT: movi v0.4s, #1 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: sxtb w8, w8 ; CHECK-GI-NEXT: mov b2, v1.b[2] ; CHECK-GI-NEXT: mov b3, v1.b[1] ; CHECK-GI-NEXT: mov b4, v1.b[3] ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: fmov w9, s2 ; CHECK-GI-NEXT: fmov w10, s3 ; CHECK-GI-NEXT: fmov w11, s4 ; CHECK-GI-NEXT: sxtb w9, w9 ; CHECK-GI-NEXT: sxtb w10, w10 ; CHECK-GI-NEXT: sxtb w11, w11 ; CHECK-GI-NEXT: fmov s2, w9 ; CHECK-GI-NEXT: mov v1.h[1], w10 ; CHECK-GI-NEXT: mov v2.h[1], w11 ; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] ; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i8>, ptr %A %tmp2 = sext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } define <4 x i32> @neon_sshll4s_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshll4s_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshll4s_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: movi v1.4s, #1 ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } define <4 x i32> @neon_sshll4s_neg_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshll4s_neg_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshll4s_neg_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } ; FIXME: should be constant folded. define <4 x i32> @neon_sshl4s_constant_fold() nounwind { ; CHECK-SD-LABEL: neon_sshl4s_constant_fold: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI173_0 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI173_0] ; CHECK-SD-NEXT: shl v0.4s, v0.4s, #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshl4s_constant_fold: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.4s, #2 ; CHECK-GI-NEXT: adrp x8, .LCPI173_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI173_0] ; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ret %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> ) ret <4 x i32> %tmp3 } define <4 x i32> @neon_sshl4s_no_fold(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshl4s_no_fold: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshl4s_no_fold: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.4s, #1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @neon_sshll2d_constant_shift(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshll2d_constant_shift: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshll2d_constant_shift: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: adrp x8, .LCPI175_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI175_0] ; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> ) ret <2 x i64> %tmp3 } define <1 x i64> @neon_sshll_vscalar_constant_shift(ptr %A) nounwind { ; CHECK-LABEL: neon_sshll_vscalar_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s ; CHECK-NEXT: shl d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i32>, ptr %A %tmp2 = zext <1 x i32> %tmp1 to <1 x i64> %tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> ) ret <1 x i64> %tmp3 } define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind { ; CHECK-LABEL: neon_sshll_scalar_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: shl d0, d0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i32, ptr %A %tmp2 = zext i32 %tmp1 to i64 %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1) ret i64 %tmp3 } define i64 @neon_sshll_scalar_constant_shift_m1(ptr %A) nounwind { ; CHECK-LABEL: neon_sshll_scalar_constant_shift_m1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: sshr d0, d0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1 = load i32, ptr %A %tmp2 = zext i32 %tmp1 to i64 %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1) ret i64 %tmp3 } ; FIXME: should be constant folded. define <2 x i64> @neon_sshl2d_constant_fold() nounwind { ; CHECK-SD-LABEL: neon_sshl2d_constant_fold: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI179_0 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI179_0] ; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshl2d_constant_fold: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI179_1 ; CHECK-GI-NEXT: adrp x9, .LCPI179_0 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI179_1] ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI179_0] ; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ret %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> , <2 x i64> ) ret <2 x i64> %tmp3 } define <2 x i64> @neon_sshl2d_no_fold(ptr %A) nounwind { ; CHECK-SD-LABEL: neon_sshl2d_no_fold: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: shl v0.2d, v0.2d, #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: neon_sshl2d_no_fold: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI180_0 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI180_0] ; CHECK-GI-NEXT: sshl v0.2d, v1.2d, v0.2d ; CHECK-GI-NEXT: ret %tmp2 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> ) ret <2 x i64> %tmp3 } define <8 x i16> @sshll2_8h(ptr %A) nounwind { ; CHECK-SD-LABEL: sshll2_8h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0, #8] ; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sshll2_8h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: mov d0, v0.d[1] ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #1 ; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> %tmp3 = shl <8 x i16> %tmp2, ret <8 x i16> %tmp3 } define <4 x i32> @sshll2_4s(ptr %A) nounwind { ; CHECK-SD-LABEL: sshll2_4s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0, #8] ; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sshll2_4s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: mov d0, v0.d[1] ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #1 ; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> %tmp3 = shl <4 x i32> %tmp2, ret <4 x i32> %tmp3 } define <2 x i64> @sshll2_2d(ptr %A) nounwind { ; CHECK-SD-LABEL: sshll2_2d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0, #8] ; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sshll2_2d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: mov d0, v0.d[1] ; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #1 ; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> %tmp3 = shl <2 x i64> %tmp2, ret <2 x i64> %tmp3 } define <8 x i8> @sqshli8b(ptr %A) nounwind { ; CHECK-SD-LABEL: sqshli8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: sqshl v0.8b, v0.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sqshli8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.8b, #1 ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: sqshl v0.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @sqshli4h(ptr %A) nounwind { ; CHECK-SD-LABEL: sqshli4h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: sqshl v0.4h, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sqshli4h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.4h, #1 ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: sqshl v0.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @sqshli2s(ptr %A) nounwind { ; CHECK-SD-LABEL: sqshli2s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: sqshl v0.2s, v0.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sqshli2s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2s, #1 ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: sqshl v0.2s, v1.2s, v0.2s ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @sqshli16b(ptr %A) nounwind { ; CHECK-SD-LABEL: sqshli16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: sqshl v0.16b, v0.16b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sqshli16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.16b, #1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: sqshl v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @sqshli8h(ptr %A) nounwind { ; CHECK-SD-LABEL: sqshli8h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: sqshl v0.8h, v0.8h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sqshli8h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.8h, #1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: sqshl v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @sqshli4s(ptr %A) nounwind { ; CHECK-SD-LABEL: sqshli4s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: sqshl v0.4s, v0.4s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sqshli4s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.4s, #1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: sqshl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @sqshli2d(ptr %A) nounwind { ; CHECK-SD-LABEL: sqshli2d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: sqshl v0.2d, v0.2d, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sqshli2d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI190_0 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI190_0] ; CHECK-GI-NEXT: sqshl v0.2d, v1.2d, v0.2d ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } define <8 x i8> @uqshli8b(ptr %A) nounwind { ; CHECK-SD-LABEL: uqshli8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: uqshl v0.8b, v0.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uqshli8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.8b, #1 ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: uqshl v0.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <8 x i8> @uqshli8b_1(ptr %A) nounwind { ; CHECK-LABEL: uqshli8b_1: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.8b, #8 ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: uqshl v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @uqshli4h(ptr %A) nounwind { ; CHECK-SD-LABEL: uqshli4h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: uqshl v0.4h, v0.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uqshli4h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.4h, #1 ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: uqshl v0.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @uqshli2s(ptr %A) nounwind { ; CHECK-SD-LABEL: uqshli2s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: uqshl v0.2s, v0.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uqshli2s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2s, #1 ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: uqshl v0.2s, v1.2s, v0.2s ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @uqshli16b(ptr %A) nounwind { ; CHECK-SD-LABEL: uqshli16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: uqshl v0.16b, v0.16b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uqshli16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.16b, #1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: uqshl v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @uqshli8h(ptr %A) nounwind { ; CHECK-SD-LABEL: uqshli8h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: uqshl v0.8h, v0.8h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uqshli8h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.8h, #1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: uqshl v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @uqshli4s(ptr %A) nounwind { ; CHECK-SD-LABEL: uqshli4s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: uqshl v0.4s, v0.4s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uqshli4s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.4s, #1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: uqshl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @uqshli2d(ptr %A) nounwind { ; CHECK-SD-LABEL: uqshli2d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: uqshl v0.2d, v0.2d, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uqshli2d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI198_0 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI198_0] ; CHECK-GI-NEXT: uqshl v0.2d, v1.2d, v0.2d ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: ursra8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d1, [x0] ; CHECK-SD-NEXT: ldr d0, [x1] ; CHECK-SD-NEXT: ursra v0.8b, v1.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ursra8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ldr d1, [x1] ; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) %tmp4 = load <8 x i8>, ptr %B %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 } define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: ursra4h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d1, [x0] ; CHECK-SD-NEXT: ldr d0, [x1] ; CHECK-SD-NEXT: ursra v0.4h, v1.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ursra4h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: ldr d1, [x1] ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) %tmp4 = load <4 x i16>, ptr %B %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 } define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: ursra2s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d1, [x0] ; CHECK-SD-NEXT: ldr d0, [x1] ; CHECK-SD-NEXT: ursra v0.2s, v1.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ursra2s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s ; CHECK-GI-NEXT: ldr d1, [x1] ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) %tmp4 = load <2 x i32>, ptr %B %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 } define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: ursra16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: ldr q0, [x1] ; CHECK-SD-NEXT: ursra v0.16b, v1.16b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ursra16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) %tmp4 = load <16 x i8>, ptr %B %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 } define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: ursra8h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: ldr q0, [x1] ; CHECK-SD-NEXT: ursra v0.8h, v1.8h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ursra8h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) %tmp4 = load <8 x i16>, ptr %B %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 } define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: ursra4s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: ldr q0, [x1] ; CHECK-SD-NEXT: ursra v0.4s, v1.4s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ursra4s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) %tmp4 = load <4 x i32>, ptr %B %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: ursra2d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: ldr q0, [x1] ; CHECK-SD-NEXT: ursra v0.2d, v1.2d, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ursra2d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) %tmp4 = load <2 x i64>, ptr %B %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: ursra1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] ; CHECK-NEXT: ursra d0, d1, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) %tmp4 = load <1 x i64>, ptr %B %tmp5 = add <1 x i64> %tmp3, %tmp4 ret <1 x i64> %tmp5 } define i64 @ursra_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: ursra_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ursra d1, d0, #1 ; CHECK-NEXT: fmov x0, d1 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1) %tmp4 = load i64, ptr %B %tmp5 = add i64 %tmp3, %tmp4 ret i64 %tmp5 } define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: srsra8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d1, [x0] ; CHECK-SD-NEXT: ldr d0, [x1] ; CHECK-SD-NEXT: srsra v0.8b, v1.8b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srsra8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ldr d1, [x1] ; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) %tmp4 = load <8 x i8>, ptr %B %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 } define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: srsra4h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d1, [x0] ; CHECK-SD-NEXT: ldr d0, [x1] ; CHECK-SD-NEXT: srsra v0.4h, v1.4h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srsra4h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: ldr d1, [x1] ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) %tmp4 = load <4 x i16>, ptr %B %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 } define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: srsra2s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d1, [x0] ; CHECK-SD-NEXT: ldr d0, [x1] ; CHECK-SD-NEXT: srsra v0.2s, v1.2s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srsra2s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s ; CHECK-GI-NEXT: ldr d1, [x1] ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) %tmp4 = load <2 x i32>, ptr %B %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 } define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: srsra16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: ldr q0, [x1] ; CHECK-SD-NEXT: srsra v0.16b, v1.16b, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srsra16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) %tmp4 = load <16 x i8>, ptr %B %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 } define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: srsra8h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: ldr q0, [x1] ; CHECK-SD-NEXT: srsra v0.8h, v1.8h, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srsra8h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) %tmp4 = load <8 x i16>, ptr %B %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 } define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: srsra4s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: ldr q0, [x1] ; CHECK-SD-NEXT: srsra v0.4s, v1.4s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srsra4s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) %tmp4 = load <4 x i32>, ptr %B %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: srsra2d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: ldr q0, [x1] ; CHECK-SD-NEXT: srsra v0.2d, v1.2d, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srsra2d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) %tmp4 = load <2 x i64>, ptr %B %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srsra1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] ; CHECK-NEXT: srsra d0, d1, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) %tmp4 = load <1 x i64>, ptr %B %tmp5 = add <1 x i64> %tmp3, %tmp4 ret <1 x i64> %tmp5 } define i64 @srsra_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: srsra_scalar: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: srsra d1, d0, #1 ; CHECK-NEXT: fmov x0, d1 ; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1) %tmp4 = load i64, ptr %B %tmp5 = add i64 %tmp3, %tmp4 ret i64 %tmp5 } define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: usra8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] ; CHECK-NEXT: usra v0.8b, v1.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = lshr <8 x i8> %tmp1, %tmp4 = load <8 x i8>, ptr %B %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 } define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: usra4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] ; CHECK-NEXT: usra v0.4h, v1.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = lshr <4 x i16> %tmp1, %tmp4 = load <4 x i16>, ptr %B %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 } define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: usra2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] ; CHECK-NEXT: usra v0.2s, v1.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = lshr <2 x i32> %tmp1, %tmp4 = load <2 x i32>, ptr %B %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 } define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: usra16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: usra v0.16b, v1.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = lshr <16 x i8> %tmp1, %tmp4 = load <16 x i8>, ptr %B %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 } define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: usra8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: usra v0.8h, v1.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = lshr <8 x i16> %tmp1, %tmp4 = load <8 x i16>, ptr %B %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 } define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: usra4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: usra v0.4s, v1.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = lshr <4 x i32> %tmp1, %tmp4 = load <4 x i32>, ptr %B %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: usra2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: usra v0.2d, v1.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = lshr <2 x i64> %tmp1, %tmp4 = load <2 x i64>, ptr %B %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: usra1d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d1, [x0] ; CHECK-SD-NEXT: ldr d0, [x1] ; CHECK-SD-NEXT: usra d0, d1, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: usra1d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr x8, [x0] ; CHECK-GI-NEXT: ldr x9, [x1] ; CHECK-GI-NEXT: add x8, x9, x8, lsr #1 ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = lshr <1 x i64> %tmp1, %tmp4 = load <1 x i64>, ptr %B %tmp5 = add <1 x i64> %tmp3, %tmp4 ret <1 x i64> %tmp5 } define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: ssra8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] ; CHECK-NEXT: ssra v0.8b, v1.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = ashr <8 x i8> %tmp1, %tmp4 = load <8 x i8>, ptr %B %tmp5 = add <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 } define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: ssra4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] ; CHECK-NEXT: ssra v0.4h, v1.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = ashr <4 x i16> %tmp1, %tmp4 = load <4 x i16>, ptr %B %tmp5 = add <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 } define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: ssra2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] ; CHECK-NEXT: ssra v0.2s, v1.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = ashr <2 x i32> %tmp1, %tmp4 = load <2 x i32>, ptr %B %tmp5 = add <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 } define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: ssra16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ssra v0.16b, v1.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = ashr <16 x i8> %tmp1, %tmp4 = load <16 x i8>, ptr %B %tmp5 = add <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 } define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: ssra8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ssra v0.8h, v1.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = ashr <8 x i16> %tmp1, %tmp4 = load <8 x i16>, ptr %B %tmp5 = add <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 } define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: ssra4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ssra v0.4s, v1.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = ashr <4 x i32> %tmp1, %tmp4 = load <4 x i32>, ptr %B %tmp5 = add <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: ssra2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ssra v0.2d, v1.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = ashr <2 x i64> %tmp1, %tmp4 = load <2 x i64>, ptr %B %tmp5 = add <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: shr_orr8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ushr v0.8b, v0.8b, #1 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp4 = load <8 x i8>, ptr %B %tmp3 = lshr <8 x i8> %tmp1, %tmp5 = or <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 } define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: shr_orr4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ushr v0.4h, v0.4h, #1 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp4 = load <4 x i16>, ptr %B %tmp3 = lshr <4 x i16> %tmp1, %tmp5 = or <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 } define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: shr_orr2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ushr v0.2s, v0.2s, #1 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp4 = load <2 x i32>, ptr %B %tmp3 = lshr <2 x i32> %tmp1, %tmp5 = or <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 } define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: shr_orr16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: ushr v0.16b, v0.16b, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp4 = load <16 x i8>, ptr %B %tmp3 = lshr <16 x i8> %tmp1, %tmp5 = or <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 } define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: shr_orr8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: ushr v0.8h, v0.8h, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp4 = load <8 x i16>, ptr %B %tmp3 = lshr <8 x i16> %tmp1, %tmp5 = or <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 } define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: shr_orr4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp4 = load <4 x i32>, ptr %B %tmp3 = lshr <4 x i32> %tmp1, %tmp5 = or <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: shr_orr2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: ushr v0.2d, v0.2d, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp4 = load <2 x i64>, ptr %B %tmp3 = lshr <2 x i64> %tmp1, %tmp5 = or <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: shl_orr8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: ldr d1, [x1] ; CHECK-SD-NEXT: add v0.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shl_orr8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: ldr d1, [x1] ; CHECK-GI-NEXT: shl v0.8b, v0.8b, #1 ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp4 = load <8 x i8>, ptr %B %tmp3 = shl <8 x i8> %tmp1, %tmp5 = or <8 x i8> %tmp3, %tmp4 ret <8 x i8> %tmp5 } define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: shl_orr4h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: ldr d1, [x1] ; CHECK-SD-NEXT: add v0.4h, v0.4h, v0.4h ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shl_orr4h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: ldr d1, [x1] ; CHECK-GI-NEXT: shl v0.4h, v0.4h, #1 ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp4 = load <4 x i16>, ptr %B %tmp3 = shl <4 x i16> %tmp1, %tmp5 = or <4 x i16> %tmp3, %tmp4 ret <4 x i16> %tmp5 } define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: shl_orr2s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr d0, [x0] ; CHECK-SD-NEXT: ldr d1, [x1] ; CHECK-SD-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shl_orr2s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr d0, [x0] ; CHECK-GI-NEXT: ldr d1, [x1] ; CHECK-GI-NEXT: shl v0.2s, v0.2s, #1 ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp4 = load <2 x i32>, ptr %B %tmp3 = shl <2 x i32> %tmp1, %tmp5 = or <2 x i32> %tmp3, %tmp4 ret <2 x i32> %tmp5 } define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: shl_orr16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: ldr q1, [x1] ; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shl_orr16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: shl v0.16b, v0.16b, #1 ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp4 = load <16 x i8>, ptr %B %tmp3 = shl <16 x i8> %tmp1, %tmp5 = or <16 x i8> %tmp3, %tmp4 ret <16 x i8> %tmp5 } define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: shl_orr8h: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: ldr q1, [x1] ; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shl_orr8h: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: shl v0.8h, v0.8h, #1 ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp4 = load <8 x i16>, ptr %B %tmp3 = shl <8 x i16> %tmp1, %tmp5 = or <8 x i16> %tmp3, %tmp4 ret <8 x i16> %tmp5 } define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: shl_orr4s: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: ldr q1, [x1] ; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shl_orr4s: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #1 ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp4 = load <4 x i32>, ptr %B %tmp3 = shl <4 x i32> %tmp1, %tmp5 = or <4 x i32> %tmp3, %tmp4 ret <4 x i32> %tmp5 } define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind { ; CHECK-SD-LABEL: shl_orr2d: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ldr q0, [x0] ; CHECK-SD-NEXT: ldr q1, [x1] ; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shl_orr2d: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #1 ; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp4 = load <2 x i64>, ptr %B %tmp3 = shl <2 x i64> %tmp1, %tmp5 = or <2 x i64> %tmp3, %tmp4 ret <2 x i64> %tmp5 } define <8 x i16> @shll(<8 x i8> %in) { ; CHECK-SD-LABEL: shll: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: shll v0.8h, v0.8b, #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shll: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8 ; CHECK-GI-NEXT: ret %ext = zext <8 x i8> %in to <8 x i16> %res = shl <8 x i16> %ext, ret <8 x i16> %res } define <4 x i32> @shll_high(<8 x i16> %in) { ; CHECK-SD-LABEL: shll_high: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: shll2 v0.4s, v0.8h, #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shll_high: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 ; CHECK-GI-NEXT: shl v0.4s, v0.4s, #16 ; CHECK-GI-NEXT: ret %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> %ext = zext <4 x i16> %extract to <4 x i32> %res = shl <4 x i32> %ext, ret <4 x i32> %res } define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sli8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sli v0.8b, v1.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1) ret <8 x i8> %tmp3 } define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sli4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sli v0.4h, v1.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1) ret <4 x i16> %tmp3 } define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sli2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sli v0.2s, v1.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1) ret <2 x i32> %tmp3 } define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sli1d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sli d0, d1, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1) ret <1 x i64> %tmp3 } ; Ensure we can select scalar SLI with a zero shift (see issue #139879). define <1 x i64> @sli1d_imm0(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: sli1d_imm0: ; CHECK: // %bb.0: ; CHECK-NEXT: sli d0, d1, #0 ; CHECK-NEXT: ret %r = call <1 x i64> @llvm.aarch64.neon.vsli(<1 x i64> %a, <1 x i64> %b, i32 0) ret <1 x i64> %r } define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sli16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sli v0.16b, v1.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1) ret <16 x i8> %tmp3 } define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sli8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sli v0.8h, v1.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1) ret <8 x i16> %tmp3 } define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sli4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sli v0.4s, v1.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1) ret <4 x i32> %tmp3 } define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sli2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: sli v0.2d, v1.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1) ret <2 x i64> %tmp3 } declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-SD-LABEL: ashr_v1i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: neg d1, d1 ; CHECK-SD-NEXT: sshl d0, d0, d1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ashr_v1i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: asr x8, x8, x9 ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: ret %c = ashr <1 x i64> %a, %b ret <1 x i64> %c } define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { ; CHECK-SD-LABEL: sqshl_zero_shift_amount: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: str q0, [x0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sqshl_zero_shift_amount: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: sqshl v0.2d, v0.2d, v2.2d ; CHECK-GI-NEXT: str q0, [x0] ; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 ret void } define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { ; CHECK-SD-LABEL: uqshl_zero_shift_amount: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: str q0, [x0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uqshl_zero_shift_amount: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: uqshl v0.2d, v0.2d, v2.2d ; CHECK-GI-NEXT: str q0, [x0] ; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 ret void } define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { ; CHECK-SD-LABEL: srshl_zero_shift_amount: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: str q0, [x0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: srshl_zero_shift_amount: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: srshl v0.2d, v0.2d, v2.2d ; CHECK-GI-NEXT: str q0, [x0] ; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 ret void } define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { ; CHECK-SD-LABEL: urshl_zero_shift_amount: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: str q0, [x0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: urshl_zero_shift_amount: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: urshl v0.2d, v0.2d, v2.2d ; CHECK-GI-NEXT: str q0, [x0] ; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 ret void } define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { ; CHECK-LABEL: sqshlu_zero_shift_amount: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-NEXT: sqshlu v0.2d, v0.2d, #0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 ret void } define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { ; CHECK-SD-LABEL: sshl_zero_shift_amount: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: str q0, [x0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sshl_zero_shift_amount: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v2.2d ; CHECK-GI-NEXT: str q0, [x0] ; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 ret void } define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { ; CHECK-SD-LABEL: ushl_zero_shift_amount: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: str q0, [x0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ushl_zero_shift_amount: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 ; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v2.2d ; CHECK-GI-NEXT: str q0, [x0] ; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 ret void } define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) { ; CHECK-LABEL: sext_rshrn: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: ret entry: %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13) %vmovl.i = sext <4 x i16> %vrshrn_n1 to <4 x i32> ret <4 x i32> %vmovl.i } define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) { ; CHECK-LABEL: zext_rshrn: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ret entry: %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13) %vmovl.i = zext <4 x i16> %vrshrn_n1 to <4 x i32> ret <4 x i32> %vmovl.i } define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) { ; CHECK-LABEL: mul_rshrn: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.4s, #3 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 ; CHECK-NEXT: ret entry: %b = add <4 x i32> %a, %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 13) ret <4 x i16> %vrshrn_n1 } define <8 x i16> @signbits_vashr(<8 x i16> %a) { ; CHECK-SD-LABEL: signbits_vashr: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #8 ; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #9 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: signbits_vashr: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mvni v1.8h, #7 ; CHECK-GI-NEXT: mvni v2.8h, #8 ; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v2.8h ; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #7 ; CHECK-GI-NEXT: ret %b = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> ) %c = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %b, <8 x i16> ) %d = ashr <8 x i16> %c, ret <8 x i16> %d } define <2 x i8> @lshr_trunc_v2i64_v2i8(<2 x i64> %a) { ; CHECK-LABEL: lshr_trunc_v2i64_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: shrn v0.2s, v0.2d, #16 ; CHECK-NEXT: ret %b = lshr <2 x i64> %a, %c = trunc <2 x i64> %b to <2 x i8> ret <2 x i8> %c } define <2 x i8> @ashr_trunc_v2i64_v2i8(<2 x i64> %a) { ; CHECK-LABEL: ashr_trunc_v2i64_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: shrn v0.2s, v0.2d, #16 ; CHECK-NEXT: ret %b = ashr <2 x i64> %a, %c = trunc <2 x i64> %b to <2 x i8> ret <2 x i8> %c } define <2 x i8> @shl_trunc_v2i64_v2i8(<2 x i64> %a) { ; CHECK-SD-LABEL: shl_trunc_v2i64_v2i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: xtn v0.2s, v0.2d ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shl_trunc_v2i64_v2i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: shl v0.2d, v0.2d, #16 ; CHECK-GI-NEXT: xtn v0.2s, v0.2d ; CHECK-GI-NEXT: ret %b = shl <2 x i64> %a, %c = trunc <2 x i64> %b to <2 x i8> ret <2 x i8> %c } declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)