; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { ; CHECK-LABEL: tbl1_8b: ; CHECK: // %bb.0: ; CHECK-NEXT: tbl.8b v0, { v0 }, v1 ; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B) ret <8 x i8> %tmp3 } define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { ; CHECK-LABEL: tbl1_16b: ; CHECK: // %bb.0: ; CHECK-NEXT: tbl.16b v0, { v0 }, v1 ; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) ret <16 x i8> %tmp3 } define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { ; CHECK-SD-LABEL: tbl2_8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbl2_8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) ret <8 x i8> %tmp3 } define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { ; CHECK-SD-LABEL: tbl2_16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbl2_16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %tmp3 } define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { ; CHECK-SD-LABEL: tbl3_8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbl3_8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 ; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { ; CHECK-SD-LABEL: tbl3_16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbl3_16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 ; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { ; CHECK-SD-LABEL: tbl4_8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbl4_8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 ; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { ; CHECK-SD-LABEL: tbl4_16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbl4_16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } ; CHECK-SD-LABEL: .LCPI8_0: ; CHECK-SD: .byte 0 // 0x0 ; CHECK-SD-NEXT: .byte 4 // 0x4 ; CHECK-SD-NEXT: .byte 8 // 0x8 ; CHECK-SD-NEXT: .byte 12 // 0xc ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-GI-LABEL: .LCPI8_0: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 1 // 0x1 ; CHECK-GI-NEXT: .byte 2 // 0x2 ; CHECK-GI-NEXT: .byte 3 // 0x3 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 13 // 0xd ; CHECK-GI-NEXT: .byte 14 // 0xe ; CHECK-GI-NEXT: .byte 15 // 0xf ; CHECK-GI-LABEL: .LCPI8_1: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI8_0 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4 ; CHECK-SD-NEXT: mov.s v0[1], v1[1] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI8_1 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI8_0 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4 ; CHECK-GI-NEXT: mov.d v0[1], v1[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] ; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> ) %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> ret <8 x i8> %s } ; CHECK-SD-LABEL: .LCPI9_0: ; CHECK-SD-NEXT: .byte 0 // 0x0 ; CHECK-SD-NEXT: .byte 4 // 0x4 ; CHECK-SD-NEXT: .byte 8 // 0x8 ; CHECK-SD-NEXT: .byte 12 // 0xc ; CHECK-SD-NEXT: .byte 16 // 0x10 ; CHECK-SD-NEXT: .byte 20 // 0x14 ; CHECK-SD-NEXT: .byte 24 // 0x18 ; CHECK-SD-NEXT: .byte 28 // 0x1c ; CHECK-SD-NEXT: .byte 32 // 0x20 ; CHECK-SD-NEXT: .byte 36 // 0x24 ; CHECK-SD-NEXT: .byte 40 // 0x28 ; CHECK-SD-NEXT: .byte 44 // 0x2c ; CHECK-SD-NEXT: .byte 48 // 0x30 ; CHECK-SD-NEXT: .byte 52 // 0x34 ; CHECK-SD-NEXT: .byte 56 // 0x38 ; CHECK-SD-NEXT: .byte 60 // 0x3c ;CHECK-GI-LABEL: .LCPI9_0: ;CHECK-GI: .byte 0 // 0x0 ;CHECK-GI-NEXT: .byte 1 // 0x1 ;CHECK-GI-NEXT: .byte 2 // 0x2 ;CHECK-GI-NEXT: .byte 3 // 0x3 ;CHECK-GI-NEXT: .byte 4 // 0x4 ;CHECK-GI-NEXT: .byte 5 // 0x5 ;CHECK-GI-NEXT: .byte 6 // 0x6 ;CHECK-GI-NEXT: .byte 7 // 0x7 ;CHECK-GI-NEXT: .byte 16 // 0x10 ;CHECK-GI-NEXT: .byte 17 // 0x11 ;CHECK-GI-NEXT: .byte 18 // 0x12 ;CHECK-GI-NEXT: .byte 19 // 0x13 ;CHECK-GI-NEXT: .byte 20 // 0x14 ;CHECK-GI-NEXT: .byte 21 // 0x15 ;CHECK-GI-NEXT: .byte 22 // 0x16 ;CHECK-GI-NEXT: .byte 23 // 0x17 ;CHECK-GI-LABEL: .LCPI9_1: ;CHECK-GI: .byte 0 // 0x0 ;CHECK-GI-NEXT: .byte 4 // 0x4 ;CHECK-GI-NEXT: .byte 8 // 0x8 ;CHECK-GI-NEXT: .byte 12 // 0xc ;CHECK-GI-NEXT: .byte 16 // 0x10 ;CHECK-GI-NEXT: .byte 20 // 0x14 ;CHECK-GI-NEXT: .byte 24 // 0x18 ;CHECK-GI-NEXT: .byte 28 // 0x1c ;CHECK-GI-NEXT: .byte 255 // 0xff ;CHECK-GI-NEXT: .byte 255 // 0xff ;CHECK-GI-NEXT: .byte 255 // 0xff ;CHECK-GI-NEXT: .byte 255 // 0xff ;CHECK-GI-NEXT: .byte 255 // 0xff ;CHECK-GI-NEXT: .byte 255 // 0xff ;CHECK-GI-NEXT: .byte 255 // 0xff ;CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI9_0 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI9_1 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI9_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } ; CHECK-GI-LABEL: .LCPI10_0: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 1 // 0x1 ; CHECK-GI-NEXT: .byte 2 // 0x2 ; CHECK-GI-NEXT: .byte 3 // 0x3 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 5 // 0x5 ; CHECK-GI-NEXT: .byte 6 // 0x6 ; CHECK-GI-NEXT: .byte 7 // 0x7 ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 17 // 0x11 ; CHECK-GI-NEXT: .byte 18 // 0x12 ; CHECK-GI-NEXT: .byte 19 // 0x13 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 22 // 0x16 ; CHECK-GI-NEXT: .byte 23 // 0x17 ; CHECK-GI-LABEL: .LCPI10_1: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 24 // 0x18 ; CHECK-GI-NEXT: .byte 28 // 0x1c ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov s4, w0 ; CHECK-SD-NEXT: mov w8, #32 // =0x20 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[1], w0 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[2], w0 ; CHECK-SD-NEXT: mov.b v4[3], w0 ; CHECK-SD-NEXT: mov.b v4[4], w0 ; CHECK-SD-NEXT: mov.b v4[5], w0 ; CHECK-SD-NEXT: mov.b v4[6], w0 ; CHECK-SD-NEXT: mov.b v4[7], w0 ; CHECK-SD-NEXT: mov.b v4[8], w8 ; CHECK-SD-NEXT: mov w8, #36 // =0x24 ; CHECK-SD-NEXT: mov.b v4[9], w8 ; CHECK-SD-NEXT: mov w8, #40 // =0x28 ; CHECK-SD-NEXT: mov.b v4[10], w8 ; CHECK-SD-NEXT: mov w8, #44 // =0x2c ; CHECK-SD-NEXT: mov.b v4[11], w8 ; CHECK-SD-NEXT: mov w8, #48 // =0x30 ; CHECK-SD-NEXT: mov.b v4[12], w8 ; CHECK-SD-NEXT: mov w8, #52 // =0x34 ; CHECK-SD-NEXT: mov.b v4[13], w8 ; CHECK-SD-NEXT: mov w8, #56 // =0x38 ; CHECK-SD-NEXT: mov.b v4[14], w8 ; CHECK-SD-NEXT: mov w8, #60 // =0x3c ; CHECK-SD-NEXT: mov.b v4[15], w8 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: mov.b v4[1], w0 ; CHECK-GI-NEXT: mov.b v4[2], w0 ; CHECK-GI-NEXT: mov.b v4[3], w0 ; CHECK-GI-NEXT: mov.b v4[4], w0 ; CHECK-GI-NEXT: mov.b v4[5], w0 ; CHECK-GI-NEXT: mov.b v4[6], w0 ; CHECK-GI-NEXT: mov.b v4[7], w0 ; CHECK-GI-NEXT: mov.b v4[8], w8 ; CHECK-GI-NEXT: mov.b v4[9], w8 ; CHECK-GI-NEXT: mov.b v4[10], w8 ; CHECK-GI-NEXT: mov.b v4[11], w8 ; CHECK-GI-NEXT: mov.b v4[12], w8 ; CHECK-GI-NEXT: mov.b v4[13], w8 ; CHECK-GI-NEXT: mov.b v4[14], w8 ; CHECK-GI-NEXT: mov.b v4[15], w8 ; CHECK-GI-NEXT: adrp x8, .LCPI10_1 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI10_1] ; CHECK-GI-NEXT: adrp x8, .LCPI10_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI10_0] ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } ; CHECK-GI-LABEL: .LCPI11_0: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 1 // 0x1 ; CHECK-GI-NEXT: .byte 2 // 0x2 ; CHECK-GI-NEXT: .byte 3 // 0x3 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 5 // 0x5 ; CHECK-GI-NEXT: .byte 6 // 0x6 ; CHECK-GI-NEXT: .byte 15 // 0xf ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 17 // 0x11 ; CHECK-GI-NEXT: .byte 18 // 0x12 ; CHECK-GI-NEXT: .byte 19 // 0x13 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 22 // 0x16 ; CHECK-GI-NEXT: .byte 31 // 0x1f ; CHECK-GI-LABEL: .LCPI11_1: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 24 // 0x18 ; CHECK-GI-NEXT: .byte 28 // 0x1c ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w8, #1 // =0x1 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: fmov s4, w8 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[1], w8 ; CHECK-SD-NEXT: mov.b v4[2], w8 ; CHECK-SD-NEXT: mov.b v4[3], w8 ; CHECK-SD-NEXT: mov.b v4[4], w8 ; CHECK-SD-NEXT: mov.b v4[5], w8 ; CHECK-SD-NEXT: mov.b v4[6], w8 ; CHECK-SD-NEXT: mov w8, #32 // =0x20 ; CHECK-SD-NEXT: mov.b v4[7], w0 ; CHECK-SD-NEXT: mov.b v4[8], w8 ; CHECK-SD-NEXT: mov w8, #36 // =0x24 ; CHECK-SD-NEXT: mov.b v4[9], w8 ; CHECK-SD-NEXT: mov w8, #40 // =0x28 ; CHECK-SD-NEXT: mov.b v4[10], w8 ; CHECK-SD-NEXT: mov w8, #44 // =0x2c ; CHECK-SD-NEXT: mov.b v4[11], w8 ; CHECK-SD-NEXT: mov w8, #48 // =0x30 ; CHECK-SD-NEXT: mov.b v4[12], w8 ; CHECK-SD-NEXT: mov w8, #52 // =0x34 ; CHECK-SD-NEXT: mov.b v4[13], w8 ; CHECK-SD-NEXT: mov w8, #56 // =0x38 ; CHECK-SD-NEXT: mov.b v4[14], w8 ; CHECK-SD-NEXT: mov w8, #31 // =0x1f ; CHECK-SD-NEXT: mov.b v4[15], w8 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: mov.b v4[1], w8 ; CHECK-GI-NEXT: mov.b v4[2], w8 ; CHECK-GI-NEXT: mov.b v4[3], w8 ; CHECK-GI-NEXT: mov.b v4[4], w8 ; CHECK-GI-NEXT: mov.b v4[5], w8 ; CHECK-GI-NEXT: mov.b v4[6], w8 ; CHECK-GI-NEXT: mov.b v4[7], w8 ; CHECK-GI-NEXT: mov w8, #255 // =0xff ; CHECK-GI-NEXT: mov.b v4[8], w8 ; CHECK-GI-NEXT: mov.b v4[9], w8 ; CHECK-GI-NEXT: mov.b v4[10], w8 ; CHECK-GI-NEXT: mov.b v4[11], w8 ; CHECK-GI-NEXT: mov.b v4[12], w0 ; CHECK-GI-NEXT: mov.b v4[13], w0 ; CHECK-GI-NEXT: mov.b v4[14], w8 ; CHECK-GI-NEXT: adrp x8, .LCPI11_1 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI11_1] ; CHECK-GI-NEXT: adrp x8, .LCPI11_0 ; CHECK-GI-NEXT: mov.b v4[15], w0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0 %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1 %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2 %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3 %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4 %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5 %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6 %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12 %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } ; CHECK-SD-LABEL: .LCPI12_0: ; CHECK-SD: .byte 0 // 0x0 ; CHECK-SD-NEXT: .byte 4 // 0x4 ; CHECK-SD-NEXT: .byte 8 // 0x8 ; CHECK-SD-NEXT: .byte 12 // 0xc ; CHECK-SD-NEXT: .byte 16 // 0x10 ; CHECK-SD-NEXT: .byte 20 // 0x14 ; CHECK-SD-NEXT: .byte 24 // 0x18 ; CHECK-SD-NEXT: .byte 28 // 0x1c ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-GI-LABEL: .LCPI12_0: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 1 // 0x1 ; CHECK-GI-NEXT: .byte 2 // 0x2 ; CHECK-GI-NEXT: .byte 3 // 0x3 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 5 // 0x5 ; CHECK-GI-NEXT: .byte 6 // 0x6 ; CHECK-GI-NEXT: .byte 7 // 0x7 ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 17 // 0x11 ; CHECK-GI-NEXT: .byte 18 // 0x12 ; CHECK-GI-NEXT: .byte 19 // 0x13 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 22 // 0x16 ; CHECK-GI-NEXT: .byte 23 // 0x17 ; CHECK-GI-LABEL: .LCPI12_1: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 24 // 0x18 ; CHECK-GI-NEXT: .byte 28 // 0x1c ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff ; CHECK-SD-NEXT: adrp x8, .LCPI12_0 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0] ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5 ; CHECK-SD-NEXT: mov.b v4[0], w0 ; CHECK-SD-NEXT: mov.b v4[1], w0 ; CHECK-SD-NEXT: mov.b v4[2], w0 ; CHECK-SD-NEXT: mov.b v4[3], w0 ; CHECK-SD-NEXT: mov.b v4[4], w0 ; CHECK-SD-NEXT: mov.b v4[5], w0 ; CHECK-SD-NEXT: mov.b v4[6], w0 ; CHECK-SD-NEXT: mov.b v4[7], w0 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-SD-NEXT: mov.d v2[1], v0[0] ; CHECK-SD-NEXT: mov.16b v0, v2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff ; CHECK-GI-NEXT: adrp x9, .LCPI12_1 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q5, [x9, :lo12:.LCPI12_1] ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov.b v4[1], w0 ; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v5 ; CHECK-GI-NEXT: mov.b v4[2], w0 ; CHECK-GI-NEXT: mov.b v4[3], w0 ; CHECK-GI-NEXT: mov.b v4[4], w0 ; CHECK-GI-NEXT: mov.b v4[5], w0 ; CHECK-GI-NEXT: mov.b v4[6], w0 ; CHECK-GI-NEXT: mov.b v4[7], w0 ; CHECK-GI-NEXT: mov.b v4[8], w8 ; CHECK-GI-NEXT: mov.b v4[9], w8 ; CHECK-GI-NEXT: mov.b v4[10], w8 ; CHECK-GI-NEXT: mov.b v4[11], w8 ; CHECK-GI-NEXT: mov.b v4[12], w8 ; CHECK-GI-NEXT: mov.b v4[13], w8 ; CHECK-GI-NEXT: mov.b v4[14], w8 ; CHECK-GI-NEXT: mov.b v4[15], w8 ; CHECK-GI-NEXT: adrp x8, .LCPI12_0 ; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v4 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] ; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0 ; CHECK-GI-NEXT: ret %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } ; CHECK-SD-LABEL: .LCPI13_0: ; CHECK-SD: .byte 0 // 0x0 ; CHECK-SD-NEXT: .byte 4 // 0x4 ; CHECK-SD-NEXT: .byte 8 // 0x8 ; CHECK-SD-NEXT: .byte 12 // 0xc ; CHECK-SD-NEXT: .byte 16 // 0x10 ; CHECK-SD-NEXT: .byte 20 // 0x14 ; CHECK-SD-NEXT: .byte 24 // 0x18 ; CHECK-SD-NEXT: .byte 28 // 0x1c ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-NEXT: .byte 255 // 0xff ; CHECK-SD-LABEL: .LCPI13_1: ; CHECK-SD: .byte 0 // 0x0 ; CHECK-SD-NEXT: .byte 1 // 0x1 ; CHECK-SD-NEXT: .byte 2 // 0x2 ; CHECK-SD-NEXT: .byte 3 // 0x3 ; CHECK-SD-NEXT: .byte 4 // 0x4 ; CHECK-SD-NEXT: .byte 5 // 0x5 ; CHECK-SD-NEXT: .byte 6 // 0x6 ; CHECK-SD-NEXT: .byte 7 // 0x7 ; CHECK-SD-NEXT: .byte 16 // 0x10 ; CHECK-SD-NEXT: .byte 17 // 0x11 ; CHECK-SD-NEXT: .byte 18 // 0x12 ; CHECK-SD-NEXT: .byte 19 // 0x13 ; CHECK-SD-NEXT: .byte 20 // 0x14 ; CHECK-SD-NEXT: .byte 21 // 0x15 ; CHECK-SD-NEXT: .byte 30 // 0x1e ; CHECK-SD-NEXT: .byte 31 // 0x1f ; CHECK-GI-LABEL: .LCPI13_0: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 1 // 0x1 ; CHECK-GI-NEXT: .byte 2 // 0x2 ; CHECK-GI-NEXT: .byte 3 // 0x3 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 5 // 0x5 ; CHECK-GI-NEXT: .byte 6 // 0x6 ; CHECK-GI-NEXT: .byte 7 // 0x7 ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 17 // 0x11 ; CHECK-GI-NEXT: .byte 18 // 0x12 ; CHECK-GI-NEXT: .byte 19 // 0x13 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 30 // 0x1e ; CHECK-GI-NEXT: .byte 31 // 0x1f ; CHECK-GI-LABEL: .LCPI13_1: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 24 // 0x18 ; CHECK-GI-NEXT: .byte 28 // 0x1c ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: dup.16b v4, w0 ; CHECK-SD-NEXT: mov w8, #255 // =0xff ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: mov.b v4[8], w8 ; CHECK-SD-NEXT: mov.b v4[9], w8 ; CHECK-SD-NEXT: mov.b v4[10], w8 ; CHECK-SD-NEXT: mov.b v4[11], w8 ; CHECK-SD-NEXT: mov.b v4[12], w8 ; CHECK-SD-NEXT: mov.b v4[13], w8 ; CHECK-SD-NEXT: adrp x8, .LCPI13_0 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI13_0] ; CHECK-SD-NEXT: adrp x8, .LCPI13_1 ; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5 ; CHECK-SD-NEXT: tbl.16b v3, { v0, v1 }, v4 ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI13_1] ; CHECK-SD-NEXT: tbl.16b v0, { v2, v3 }, v0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov.b v4[1], w0 ; CHECK-GI-NEXT: mov.b v4[2], w0 ; CHECK-GI-NEXT: mov.b v4[3], w0 ; CHECK-GI-NEXT: mov.b v4[4], w0 ; CHECK-GI-NEXT: mov.b v4[5], w0 ; CHECK-GI-NEXT: mov.b v4[6], w0 ; CHECK-GI-NEXT: mov.b v4[7], w0 ; CHECK-GI-NEXT: mov.b v4[8], w8 ; CHECK-GI-NEXT: mov.b v4[9], w8 ; CHECK-GI-NEXT: mov.b v4[10], w8 ; CHECK-GI-NEXT: mov.b v4[11], w8 ; CHECK-GI-NEXT: mov.b v4[12], w8 ; CHECK-GI-NEXT: mov.b v4[13], w8 ; CHECK-GI-NEXT: adrp x8, .LCPI13_1 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI13_1] ; CHECK-GI-NEXT: adrp x8, .LCPI13_0 ; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v5 ; CHECK-GI-NEXT: mov.b v4[14], w0 ; CHECK-GI-NEXT: mov.b v4[15], w0 ; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v4 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] ; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0 ; CHECK-GI-NEXT: ret %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } ; CHECK-SD-LABEL: .LCPI14_0: ; CHECK-SD: .byte 0 // 0x0 ; CHECK-SD-NEXT: .byte 4 // 0x4 ; CHECK-SD-NEXT: .byte 52 // 0x34 ; CHECK-SD-NEXT: .byte 12 // 0xc ; CHECK-SD-NEXT: .byte 16 // 0x10 ; CHECK-SD-NEXT: .byte 20 // 0x14 ; CHECK-SD-NEXT: .byte 24 // 0x18 ; CHECK-SD-NEXT: .byte 28 // 0x1c ; CHECK-SD-NEXT: .byte 32 // 0x20 ; CHECK-SD-NEXT: .byte 36 // 0x24 ; CHECK-SD-NEXT: .byte 40 // 0x28 ; CHECK-SD-NEXT: .byte 44 // 0x2c ; CHECK-SD-NEXT: .byte 48 // 0x30 ; CHECK-SD-NEXT: .byte 52 // 0x34 ; CHECK-SD-NEXT: .byte 56 // 0x38 ; CHECK-SD-NEXT: .byte 60 // 0x3c ; CHECK-GI-LABEL: .LCPI14_0: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 1 // 0x1 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 3 // 0x3 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 5 // 0x5 ; CHECK-GI-NEXT: .byte 6 // 0x6 ; CHECK-GI-NEXT: .byte 7 // 0x7 ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 17 // 0x11 ; CHECK-GI-NEXT: .byte 18 // 0x12 ; CHECK-GI-NEXT: .byte 19 // 0x13 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 22 // 0x16 ; CHECK-GI-NEXT: .byte 23 // 0x17 ; CHECK-GI-LABEL: .LCPI14_1: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 24 // 0x18 ; CHECK-GI-NEXT: .byte 28 // 0x1c ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0] ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } ; CHECK-SD-LABEL: .LCPI15_0: ; CHECK-SD: .byte 0 // 0x0 ; CHECK-SD-NEXT: .byte 4 // 0x4 ; CHECK-SD-NEXT: .byte 52 // 0x34 ; CHECK-SD-NEXT: .byte 12 // 0xc ; CHECK-SD-NEXT: .byte 16 // 0x10 ; CHECK-SD-NEXT: .byte 20 // 0x14 ; CHECK-SD-NEXT: .byte 24 // 0x18 ; CHECK-SD-NEXT: .byte 28 // 0x1c ; CHECK-SD-NEXT: .byte 32 // 0x20 ; CHECK-SD-NEXT: .byte 36 // 0x24 ; CHECK-SD-NEXT: .byte 40 // 0x28 ; CHECK-SD-NEXT: .byte 44 // 0x2c ; CHECK-SD-NEXT: .byte 48 // 0x30 ; CHECK-SD-NEXT: .byte 52 // 0x34 ; CHECK-SD-NEXT: .byte 56 // 0x38 ; CHECK-SD-NEXT: .byte 60 // 0x3c ; CHECK-GI-LABEL: .LCPI15_0: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 1 // 0x1 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 3 // 0x3 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 5 // 0x5 ; CHECK-GI-NEXT: .byte 6 // 0x6 ; CHECK-GI-NEXT: .byte 7 // 0x7 ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 17 // 0x11 ; CHECK-GI-NEXT: .byte 18 // 0x12 ; CHECK-GI-NEXT: .byte 19 // 0x13 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 22 // 0x16 ; CHECK-GI-NEXT: .byte 23 // 0x17 ; CHECK-GI-LABEL: .LCPI15_1: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 24 // 0x18 ; CHECK-GI-NEXT: .byte 28 // 0x1c ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-LABEL: .LCPI15_2: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 24 // 0x18 ; CHECK-GI-NEXT: .byte 28 // 0x1c ; CHECK-GI-NEXT: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI15_0 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI15_2 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2] ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1] ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } ; CHECK-SD-LABEL: .LCPI16_0: ; CHECK-SD: .byte 0 // 0x0 ; CHECK-SD-NEXT: .byte 4 // 0x4 ; CHECK-SD-NEXT: .byte 52 // 0x34 ; CHECK-SD-NEXT: .byte 12 // 0xc ; CHECK-SD-NEXT: .byte 16 // 0x10 ; CHECK-SD-NEXT: .byte 20 // 0x14 ; CHECK-SD-NEXT: .byte 24 // 0x18 ; CHECK-SD-NEXT: .byte 28 // 0x1c ; CHECK-SD-NEXT: .byte 32 // 0x20 ; CHECK-SD-NEXT: .byte 36 // 0x24 ; CHECK-SD-NEXT: .byte 40 // 0x28 ; CHECK-SD-NEXT: .byte 44 // 0x2c ; CHECK-SD-NEXT: .byte 48 // 0x30 ; CHECK-SD-NEXT: .byte 52 // 0x34 ; CHECK-SD-NEXT: .byte 56 // 0x38 ; CHECK-SD-NEXT: .byte 60 // 0x3c ; CHECK-GI-LABEL: .LCPI16_0: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 1 // 0x1 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 3 // 0x3 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 5 // 0x5 ; CHECK-GI-NEXT: .byte 6 // 0x6 ; CHECK-GI-NEXT: .byte 7 // 0x7 ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 17 // 0x11 ; CHECK-GI-NEXT: .byte 18 // 0x12 ; CHECK-GI-NEXT: .byte 19 // 0x13 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 21 // 0x15 ; CHECK-GI-NEXT: .byte 22 // 0x16 ; CHECK-GI-NEXT: .byte 23 // 0x17 ; CHECK-GI-LABEL: .LCPI16_1: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 24 // 0x18 ; CHECK-GI-NEXT: .byte 28 // 0x1c ; CHECK-GI-NEXT: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-LABEL: .LCPI16_2: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 4 // 0x4 ; CHECK-GI-NEXT: .byte 8 // 0x8 ; CHECK-GI-NEXT: .byte 12 // 0xc ; CHECK-GI-NEXT: .byte 16 // 0x10 ; CHECK-GI-NEXT: .byte 20 // 0x14 ; CHECK-GI-NEXT: .byte 24 // 0x18 ; CHECK-GI-NEXT: .byte 28 // 0x1c ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff ; CHECK-GI-NEXT: .byte 255 // 0xff define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI16_2 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2] ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1] ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-LABEL: tbx1_8b: ; CHECK: // %bb.0: ; CHECK-NEXT: tbx.8b v0, { v1 }, v2 ; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) ret <8 x i8> %tmp3 } define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-LABEL: tbx1_16b: ; CHECK: // %bb.0: ; CHECK-NEXT: tbx.16b v0, { v1 }, v2 ; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %tmp3 } define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { ; CHECK-SD-LABEL: tbx2_8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 ; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbx2_8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 ; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3 ; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { ; CHECK-SD-LABEL: tbx2_16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 ; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbx2_16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 ; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3 ; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { ; CHECK-SD-LABEL: tbx3_8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbx3_8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 ; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { ; CHECK-SD-LABEL: tbx3_16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbx3_16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 ; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 ; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { ; CHECK-SD-LABEL: tbx4_8b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbx4_8b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 ; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) ret <8 x i8> %tmp3 } define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { ; CHECK-SD-LABEL: tbx4_16b: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: tbx4_16b: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 ; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 ; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) ret <16 x i8> %tmp3 } define <16 x i8> @pr135950(<16 x i8> %A, <16 x i8> %B, <16 x i8> %M) { ; CHECK-SD-LABEL: pr135950: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov.16b v3, v1 ; CHECK-SD-NEXT: movi.2d v1, #0000000000000000 ; CHECK-SD-NEXT: mov.16b v4, v0 ; CHECK-SD-NEXT: mov.16b v5, v3 ; CHECK-SD-NEXT: tbl.16b v1, { v3, v4 }, v1 ; CHECK-SD-NEXT: tbl.16b v0, { v4, v5 }, v2 ; CHECK-SD-NEXT: zip1.16b v0, v0, v1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: pr135950: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov.16b v3, v2 ; CHECK-GI-NEXT: movi.2d v4, #0000000000000000 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v3 ; CHECK-GI-NEXT: mov.16b v2, v0 ; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v4 ; CHECK-GI-NEXT: zip1.16b v0, v3, v0 ; CHECK-GI-NEXT: ret %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %M) %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %B, <16 x i8> %A, <16 x i8> zeroinitializer) %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> ret <16 x i8> %s } declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone