; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; These tests are taken from the combine-sdiv.ll in X86. define i32 @combine_sdiv_by_one(i32 %x) { ; CHECK-LABEL: combine_sdiv_by_one: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %1 = sdiv i32 %x, 1 ret i32 %1 } define <4 x i32> @combine_vec_sdiv_by_one(<4 x i32> %x) { ; CHECK-LABEL: combine_vec_sdiv_by_one: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %1 = sdiv <4 x i32> %x, ret <4 x i32> %1 } define i32 @combine_sdiv_by_negone(i32 %x) { ; CHECK-LABEL: combine_sdiv_by_negone: ; CHECK: // %bb.0: ; CHECK-NEXT: neg w0, w0 ; CHECK-NEXT: ret %1 = sdiv i32 %x, -1 ret i32 %1 } define <4 x i32> @combine_vec_sdiv_by_negone(<4 x i32> %x) { ; CHECK-LABEL: combine_vec_sdiv_by_negone: ; CHECK: // %bb.0: ; CHECK-NEXT: neg v0.4s, v0.4s ; CHECK-NEXT: ret %1 = sdiv <4 x i32> %x, ret <4 x i32> %1 } define i32 @combine_sdiv_by_minsigned(i32 %x) { ; CHECK-SD-LABEL: combine_sdiv_by_minsigned: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w8, #-2147483648 // =0x80000000 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, eq ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_sdiv_by_minsigned: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: asr w8, w0, #31 ; CHECK-GI-NEXT: add w8, w0, w8, lsr #1 ; CHECK-GI-NEXT: neg w0, w8, asr #31 ; CHECK-GI-NEXT: ret %1 = sdiv i32 %x, -2147483648 ret i32 %1 } define <4 x i32> @combine_vec_sdiv_by_minsigned(<4 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_minsigned: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi v1.4s, #128, lsl #24 ; CHECK-SD-NEXT: movi v2.4s, #1 ; CHECK-SD-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_minsigned: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31 ; CHECK-GI-NEXT: usra v0.4s, v1.4s, #1 ; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 ; CHECK-GI-NEXT: neg v0.4s, v0.4s ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, ret <4 x i32> %1 } define i32 @combine_sdiv_zero(i32 %x) { ; CHECK-LABEL: combine_sdiv_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %1 = sdiv i32 0, %x ret i32 %1 } define <4 x i32> @combine_vec_sdiv_zero(<4 x i32> %x) { ; CHECK-LABEL: combine_vec_sdiv_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ret %1 = sdiv <4 x i32> zeroinitializer, %x ret <4 x i32> %1 } define i32 @combine_sdiv_dupe(i32 %x) { ; CHECK-SD-LABEL: combine_sdiv_dupe: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w0, #1 // =0x1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_sdiv_dupe: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sdiv w0, w0, w0 ; CHECK-GI-NEXT: ret %1 = sdiv i32 %x, %x ret i32 %1 } define <4 x i32> @combine_vec_sdiv_dupe(<4 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_dupe: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi v0.4s, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_dupe: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov w9, v0.s[1] ; CHECK-GI-NEXT: mov w10, v0.s[2] ; CHECK-GI-NEXT: mov w11, v0.s[3] ; CHECK-GI-NEXT: sdiv w8, w8, w8 ; CHECK-GI-NEXT: sdiv w9, w9, w9 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: sdiv w10, w10, w10 ; CHECK-GI-NEXT: mov v0.s[1], w9 ; CHECK-GI-NEXT: sdiv w8, w11, w11 ; CHECK-GI-NEXT: mov v0.s[2], w10 ; CHECK-GI-NEXT: mov v0.s[3], w8 ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, %x ret <4 x i32> %1 } define <4 x i32> @combine_vec_sdiv_by_pos0(<4 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pos0: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pos0: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2 ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %x, %2 = sdiv <4 x i32> %1, ret <4 x i32> %2 } define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pos1: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-SD-NEXT: adrp x8, .LCPI11_0 ; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] ; CHECK-SD-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pos1: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NEXT: movi v2.2d, #0x0000ff000000ff ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI11_0 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI11_0] ; CHECK-GI-NEXT: mov v1.s[1], w9 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-GI-NEXT: neg v2.4s, v3.4s ; CHECK-GI-NEXT: sshl v2.4s, v0.4s, v2.4s ; CHECK-GI-NEXT: mov v1.s[2], w9 ; CHECK-GI-NEXT: mov v1.s[3], w9 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = and <4 x i32> %x, %2 = sdiv <4 x i32> %1, ret <4 x i32> %2 } define <4 x i32> @combine_vec_sdiv_by_pow2a(<4 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30 ; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31 ; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30 ; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2 ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, ret <4 x i32> %1 } define <4 x i32> @combine_vec_sdiv_by_pow2a_neg(<4 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a_neg: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30 ; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2 ; CHECK-SD-NEXT: neg v0.4s, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a_neg: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31 ; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30 ; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2 ; CHECK-GI-NEXT: neg v0.4s, v0.4s ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, ret <4 x i32> %1 } define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v16i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 ; CHECK-SD-NEXT: cmlt v1.16b, v0.16b, #0 ; CHECK-SD-NEXT: movi v3.2d, #0x000000000000ff ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] ; CHECK-SD-NEXT: adrp x8, .LCPI14_1 ; CHECK-SD-NEXT: movi v4.2d, #0xffffffffffffff00 ; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI14_1] ; CHECK-SD-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-SD-NEXT: sshl v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: and v1.16b, v1.16b, v4.16b ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 ; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7 ; CHECK-GI-NEXT: adrp x9, .LCPI14_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-GI-NEXT: adrp x8, .LCPI14_2 ; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI14_0] ; CHECK-GI-NEXT: neg v1.16b, v1.16b ; CHECK-GI-NEXT: shl v3.16b, v3.16b, #7 ; CHECK-GI-NEXT: ushl v1.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_2] ; CHECK-GI-NEXT: neg v2.16b, v2.16b ; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <16 x i8> %x, ret <16 x i8> %1 } define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v8i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI15_1 ; CHECK-SD-NEXT: cmlt v1.8h, v0.8h, #0 ; CHECK-SD-NEXT: adrp x9, .LCPI15_3 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_1] ; CHECK-SD-NEXT: adrp x8, .LCPI15_2 ; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI15_3] ; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_2] ; CHECK-SD-NEXT: adrp x8, .LCPI15_0 ; CHECK-SD-NEXT: add v1.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: sshl v1.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 ; CHECK-GI-NEXT: sshr v2.8h, v0.8h, #15 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI15_0] ; CHECK-GI-NEXT: adrp x8, .LCPI15_2 ; CHECK-GI-NEXT: neg v1.8h, v1.8h ; CHECK-GI-NEXT: ushl v1.8h, v2.8h, v1.8h ; CHECK-GI-NEXT: ushll v2.8h, v3.8b, #0 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI15_2] ; CHECK-GI-NEXT: neg v3.8h, v3.8h ; CHECK-GI-NEXT: add v1.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: shl v2.8h, v2.8h, #15 ; CHECK-GI-NEXT: sshl v1.8h, v1.8h, v3.8h ; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, ret <8 x i16> %1 } define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v16i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI16_1 ; CHECK-SD-NEXT: cmlt v2.8h, v0.8h, #0 ; CHECK-SD-NEXT: cmlt v3.8h, v1.8h, #0 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_1] ; CHECK-SD-NEXT: adrp x8, .LCPI16_2 ; CHECK-SD-NEXT: ushl v2.8h, v2.8h, v4.8h ; CHECK-SD-NEXT: ushl v3.8h, v3.8h, v4.8h ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_2] ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI16_0] ; CHECK-SD-NEXT: adrp x8, .LCPI16_3 ; CHECK-SD-NEXT: add v2.8h, v0.8h, v2.8h ; CHECK-SD-NEXT: add v3.8h, v1.8h, v3.8h ; CHECK-SD-NEXT: and v0.16b, v0.16b, v5.16b ; CHECK-SD-NEXT: and v1.16b, v1.16b, v5.16b ; CHECK-SD-NEXT: sshl v2.8h, v2.8h, v4.8h ; CHECK-SD-NEXT: sshl v3.8h, v3.8h, v4.8h ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_3] ; CHECK-SD-NEXT: and v2.16b, v2.16b, v4.16b ; CHECK-SD-NEXT: and v3.16b, v3.16b, v4.16b ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 ; CHECK-GI-NEXT: sshr v3.8h, v0.8h, #15 ; CHECK-GI-NEXT: sshr v4.8h, v1.8h, #15 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_1] ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: ldr d5, [x8, :lo12:.LCPI16_0] ; CHECK-GI-NEXT: adrp x8, .LCPI16_2 ; CHECK-GI-NEXT: neg v2.8h, v2.8h ; CHECK-GI-NEXT: ushll v5.8h, v5.8b, #0 ; CHECK-GI-NEXT: ushl v3.8h, v3.8h, v2.8h ; CHECK-GI-NEXT: ushl v2.8h, v4.8h, v2.8h ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2] ; CHECK-GI-NEXT: shl v5.8h, v5.8h, #15 ; CHECK-GI-NEXT: neg v4.8h, v4.8h ; CHECK-GI-NEXT: add v3.8h, v0.8h, v3.8h ; CHECK-GI-NEXT: add v2.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: sshl v3.8h, v3.8h, v4.8h ; CHECK-GI-NEXT: sshl v2.8h, v2.8h, v4.8h ; CHECK-GI-NEXT: sshr v4.8h, v5.8h, #15 ; CHECK-GI-NEXT: bif v0.16b, v3.16b, v4.16b ; CHECK-GI-NEXT: bif v1.16b, v2.16b, v4.16b ; CHECK-GI-NEXT: ret %1 = sdiv <16 x i16> %x, ret <16 x i16> %1 } define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v32i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI17_1 ; CHECK-SD-NEXT: cmlt v4.8h, v0.8h, #0 ; CHECK-SD-NEXT: cmlt v5.8h, v1.8h, #0 ; CHECK-SD-NEXT: cmlt v7.8h, v2.8h, #0 ; CHECK-SD-NEXT: cmlt v16.8h, v3.8h, #0 ; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI17_1] ; CHECK-SD-NEXT: adrp x8, .LCPI17_2 ; CHECK-SD-NEXT: ushl v4.8h, v4.8h, v6.8h ; CHECK-SD-NEXT: ushl v5.8h, v5.8h, v6.8h ; CHECK-SD-NEXT: ushl v7.8h, v7.8h, v6.8h ; CHECK-SD-NEXT: ushl v6.8h, v16.8h, v6.8h ; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_2] ; CHECK-SD-NEXT: adrp x8, .LCPI17_0 ; CHECK-SD-NEXT: add v4.8h, v0.8h, v4.8h ; CHECK-SD-NEXT: add v5.8h, v1.8h, v5.8h ; CHECK-SD-NEXT: ldr q17, [x8, :lo12:.LCPI17_0] ; CHECK-SD-NEXT: add v7.8h, v2.8h, v7.8h ; CHECK-SD-NEXT: add v6.8h, v3.8h, v6.8h ; CHECK-SD-NEXT: adrp x8, .LCPI17_3 ; CHECK-SD-NEXT: and v0.16b, v0.16b, v17.16b ; CHECK-SD-NEXT: and v1.16b, v1.16b, v17.16b ; CHECK-SD-NEXT: and v2.16b, v2.16b, v17.16b ; CHECK-SD-NEXT: sshl v4.8h, v4.8h, v16.8h ; CHECK-SD-NEXT: sshl v5.8h, v5.8h, v16.8h ; CHECK-SD-NEXT: and v3.16b, v3.16b, v17.16b ; CHECK-SD-NEXT: sshl v7.8h, v7.8h, v16.8h ; CHECK-SD-NEXT: sshl v6.8h, v6.8h, v16.8h ; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_3] ; CHECK-SD-NEXT: and v4.16b, v4.16b, v16.16b ; CHECK-SD-NEXT: and v5.16b, v5.16b, v16.16b ; CHECK-SD-NEXT: and v7.16b, v7.16b, v16.16b ; CHECK-SD-NEXT: and v6.16b, v6.16b, v16.16b ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v4.16b ; CHECK-SD-NEXT: orr v1.16b, v1.16b, v5.16b ; CHECK-SD-NEXT: orr v2.16b, v2.16b, v7.16b ; CHECK-SD-NEXT: orr v3.16b, v3.16b, v6.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v32i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI17_1 ; CHECK-GI-NEXT: sshr v5.8h, v0.8h, #15 ; CHECK-GI-NEXT: sshr v6.8h, v1.8h, #15 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI17_1] ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 ; CHECK-GI-NEXT: sshr v7.8h, v2.8h, #15 ; CHECK-GI-NEXT: sshr v16.8h, v3.8h, #15 ; CHECK-GI-NEXT: ldr d17, [x8, :lo12:.LCPI17_0] ; CHECK-GI-NEXT: adrp x8, .LCPI17_2 ; CHECK-GI-NEXT: neg v4.8h, v4.8h ; CHECK-GI-NEXT: ushll v17.8h, v17.8b, #0 ; CHECK-GI-NEXT: ushl v5.8h, v5.8h, v4.8h ; CHECK-GI-NEXT: ushl v6.8h, v6.8h, v4.8h ; CHECK-GI-NEXT: ushl v7.8h, v7.8h, v4.8h ; CHECK-GI-NEXT: ushl v4.8h, v16.8h, v4.8h ; CHECK-GI-NEXT: ldr q16, [x8, :lo12:.LCPI17_2] ; CHECK-GI-NEXT: shl v17.8h, v17.8h, #15 ; CHECK-GI-NEXT: neg v16.8h, v16.8h ; CHECK-GI-NEXT: add v5.8h, v0.8h, v5.8h ; CHECK-GI-NEXT: add v6.8h, v1.8h, v6.8h ; CHECK-GI-NEXT: add v7.8h, v2.8h, v7.8h ; CHECK-GI-NEXT: add v4.8h, v3.8h, v4.8h ; CHECK-GI-NEXT: sshr v17.8h, v17.8h, #15 ; CHECK-GI-NEXT: sshl v5.8h, v5.8h, v16.8h ; CHECK-GI-NEXT: sshl v6.8h, v6.8h, v16.8h ; CHECK-GI-NEXT: sshl v7.8h, v7.8h, v16.8h ; CHECK-GI-NEXT: sshl v4.8h, v4.8h, v16.8h ; CHECK-GI-NEXT: bif v0.16b, v5.16b, v17.16b ; CHECK-GI-NEXT: bif v1.16b, v6.16b, v17.16b ; CHECK-GI-NEXT: bif v2.16b, v7.16b, v17.16b ; CHECK-GI-NEXT: bif v3.16b, v4.16b, v17.16b ; CHECK-GI-NEXT: ret %1 = sdiv <32 x i16> %x, ret <32 x i16> %1 } define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v4i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI18_0 ; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] ; CHECK-SD-NEXT: adrp x8, .LCPI18_1 ; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI18_1] ; CHECK-SD-NEXT: adrp x8, .LCPI18_2 ; CHECK-SD-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: sshl v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI18_2] ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI18_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] ; CHECK-GI-NEXT: adrp x8, .LCPI18_1 ; CHECK-GI-NEXT: mov v1.s[1], w9 ; CHECK-GI-NEXT: neg v2.4s, v2.4s ; CHECK-GI-NEXT: ushl v2.4s, v3.4s, v2.4s ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI18_1] ; CHECK-GI-NEXT: mov v1.s[2], w9 ; CHECK-GI-NEXT: neg v3.4s, v3.4s ; CHECK-GI-NEXT: add v2.4s, v0.4s, v2.4s ; CHECK-GI-NEXT: mov v1.s[3], w9 ; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, ret <4 x i32> %1 } define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v8i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI19_0 ; CHECK-SD-NEXT: cmlt v2.4s, v0.4s, #0 ; CHECK-SD-NEXT: cmlt v3.4s, v1.4s, #0 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI19_0] ; CHECK-SD-NEXT: adrp x8, .LCPI19_1 ; CHECK-SD-NEXT: ushl v2.4s, v2.4s, v4.4s ; CHECK-SD-NEXT: ushl v3.4s, v3.4s, v4.4s ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI19_1] ; CHECK-SD-NEXT: adrp x8, .LCPI19_2 ; CHECK-SD-NEXT: add v2.4s, v0.4s, v2.4s ; CHECK-SD-NEXT: add v3.4s, v1.4s, v3.4s ; CHECK-SD-NEXT: sshl v2.4s, v2.4s, v4.4s ; CHECK-SD-NEXT: sshl v3.4s, v3.4s, v4.4s ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI19_2] ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-SD-NEXT: bif v1.16b, v3.16b, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NEXT: sshr v4.4s, v0.4s, #31 ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI19_0 ; CHECK-GI-NEXT: sshr v5.4s, v1.4s, #31 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI19_0] ; CHECK-GI-NEXT: adrp x8, .LCPI19_1 ; CHECK-GI-NEXT: mov v2.h[1], w9 ; CHECK-GI-NEXT: neg v3.4s, v3.4s ; CHECK-GI-NEXT: ushl v4.4s, v4.4s, v3.4s ; CHECK-GI-NEXT: ushl v3.4s, v5.4s, v3.4s ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI19_1] ; CHECK-GI-NEXT: mov v2.h[2], w9 ; CHECK-GI-NEXT: neg v5.4s, v5.4s ; CHECK-GI-NEXT: add v4.4s, v0.4s, v4.4s ; CHECK-GI-NEXT: add v3.4s, v1.4s, v3.4s ; CHECK-GI-NEXT: mov v2.h[3], w9 ; CHECK-GI-NEXT: sshl v4.4s, v4.4s, v5.4s ; CHECK-GI-NEXT: sshl v3.4s, v3.4s, v5.4s ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: bif v0.16b, v4.16b, v2.16b ; CHECK-GI-NEXT: bif v1.16b, v3.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i32> %x, ret <8 x i32> %1 } define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v16i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI20_0 ; CHECK-SD-NEXT: cmlt v4.4s, v0.4s, #0 ; CHECK-SD-NEXT: cmlt v5.4s, v1.4s, #0 ; CHECK-SD-NEXT: cmlt v7.4s, v2.4s, #0 ; CHECK-SD-NEXT: cmlt v16.4s, v3.4s, #0 ; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI20_0] ; CHECK-SD-NEXT: adrp x8, .LCPI20_1 ; CHECK-SD-NEXT: ushl v4.4s, v4.4s, v6.4s ; CHECK-SD-NEXT: ushl v5.4s, v5.4s, v6.4s ; CHECK-SD-NEXT: ushl v7.4s, v7.4s, v6.4s ; CHECK-SD-NEXT: ushl v6.4s, v16.4s, v6.4s ; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI20_1] ; CHECK-SD-NEXT: adrp x8, .LCPI20_2 ; CHECK-SD-NEXT: add v4.4s, v0.4s, v4.4s ; CHECK-SD-NEXT: add v5.4s, v1.4s, v5.4s ; CHECK-SD-NEXT: add v7.4s, v2.4s, v7.4s ; CHECK-SD-NEXT: add v6.4s, v3.4s, v6.4s ; CHECK-SD-NEXT: sshl v4.4s, v4.4s, v16.4s ; CHECK-SD-NEXT: sshl v5.4s, v5.4s, v16.4s ; CHECK-SD-NEXT: sshl v7.4s, v7.4s, v16.4s ; CHECK-SD-NEXT: sshl v6.4s, v6.4s, v16.4s ; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI20_2] ; CHECK-SD-NEXT: bif v0.16b, v4.16b, v16.16b ; CHECK-SD-NEXT: bif v1.16b, v5.16b, v16.16b ; CHECK-SD-NEXT: bif v2.16b, v7.16b, v16.16b ; CHECK-SD-NEXT: bif v3.16b, v6.16b, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NEXT: sshr v6.4s, v0.4s, #31 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI20_0 ; CHECK-GI-NEXT: sshr v7.4s, v1.4s, #31 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI20_0] ; CHECK-GI-NEXT: sshr v16.4s, v2.4s, #31 ; CHECK-GI-NEXT: sshr v17.4s, v3.4s, #31 ; CHECK-GI-NEXT: adrp x8, .LCPI20_1 ; CHECK-GI-NEXT: mov v4.h[1], w9 ; CHECK-GI-NEXT: neg v5.4s, v5.4s ; CHECK-GI-NEXT: ushl v6.4s, v6.4s, v5.4s ; CHECK-GI-NEXT: ushl v7.4s, v7.4s, v5.4s ; CHECK-GI-NEXT: ushl v16.4s, v16.4s, v5.4s ; CHECK-GI-NEXT: mov v4.h[2], w9 ; CHECK-GI-NEXT: ushl v5.4s, v17.4s, v5.4s ; CHECK-GI-NEXT: ldr q17, [x8, :lo12:.LCPI20_1] ; CHECK-GI-NEXT: neg v17.4s, v17.4s ; CHECK-GI-NEXT: add v6.4s, v0.4s, v6.4s ; CHECK-GI-NEXT: add v7.4s, v1.4s, v7.4s ; CHECK-GI-NEXT: add v16.4s, v2.4s, v16.4s ; CHECK-GI-NEXT: add v5.4s, v3.4s, v5.4s ; CHECK-GI-NEXT: mov v4.h[3], w9 ; CHECK-GI-NEXT: sshl v6.4s, v6.4s, v17.4s ; CHECK-GI-NEXT: sshl v7.4s, v7.4s, v17.4s ; CHECK-GI-NEXT: sshl v16.4s, v16.4s, v17.4s ; CHECK-GI-NEXT: sshl v5.4s, v5.4s, v17.4s ; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0 ; CHECK-GI-NEXT: shl v4.4s, v4.4s, #31 ; CHECK-GI-NEXT: sshr v4.4s, v4.4s, #31 ; CHECK-GI-NEXT: bif v0.16b, v6.16b, v4.16b ; CHECK-GI-NEXT: bif v1.16b, v7.16b, v4.16b ; CHECK-GI-NEXT: bif v2.16b, v16.16b, v4.16b ; CHECK-GI-NEXT: bif v3.16b, v5.16b, v4.16b ; CHECK-GI-NEXT: ret %1 = sdiv <16 x i32> %x, ret <16 x i32> %1 } define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v2i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI21_0 ; CHECK-SD-NEXT: cmlt v1.2d, v0.2d, #0 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] ; CHECK-SD-NEXT: adrp x8, .LCPI21_1 ; CHECK-SD-NEXT: ushl v1.2d, v1.2d, v2.2d ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI21_1] ; CHECK-SD-NEXT: adrp x8, .LCPI21_2 ; CHECK-SD-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: sshl v1.2d, v1.2d, v2.2d ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI21_2] ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v2i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI21_1 ; CHECK-GI-NEXT: sshr v2.2d, v0.2d, #63 ; CHECK-GI-NEXT: adrp x9, .LCPI21_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI21_1] ; CHECK-GI-NEXT: adrp x8, .LCPI21_2 ; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI21_0] ; CHECK-GI-NEXT: neg v1.2d, v1.2d ; CHECK-GI-NEXT: shl v3.2d, v3.2d, #63 ; CHECK-GI-NEXT: ushl v1.2d, v2.2d, v1.2d ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI21_2] ; CHECK-GI-NEXT: neg v2.2d, v2.2d ; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: sshl v1.2d, v1.2d, v2.2d ; CHECK-GI-NEXT: sshr v2.2d, v3.2d, #63 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ret %1 = sdiv <2 x i64> %x, ret <2 x i64> %1 } define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v4i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI22_0 ; CHECK-SD-NEXT: cmlt v2.2d, v0.2d, #0 ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI22_0] ; CHECK-SD-NEXT: adrp x8, .LCPI22_3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI22_3] ; CHECK-SD-NEXT: adrp x8, .LCPI22_1 ; CHECK-SD-NEXT: ushl v2.2d, v2.2d, v3.2d ; CHECK-SD-NEXT: cmlt v3.2d, v1.2d, #0 ; CHECK-SD-NEXT: add v2.2d, v0.2d, v2.2d ; CHECK-SD-NEXT: ushl v3.2d, v3.2d, v4.2d ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI22_1] ; CHECK-SD-NEXT: adrp x8, .LCPI22_2 ; CHECK-SD-NEXT: sshl v2.2d, v2.2d, v4.2d ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI22_2] ; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d ; CHECK-SD-NEXT: adrp x8, .LCPI22_4 ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI22_4] ; CHECK-SD-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-SD-NEXT: sshl v1.2d, v1.2d, v3.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v4i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI22_2 ; CHECK-GI-NEXT: sshr v3.2d, v0.2d, #63 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI22_2] ; CHECK-GI-NEXT: adrp x8, .LCPI22_1 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_1] ; CHECK-GI-NEXT: adrp x8, .LCPI22_4 ; CHECK-GI-NEXT: neg v2.2d, v2.2d ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI22_4] ; CHECK-GI-NEXT: adrp x8, .LCPI22_0 ; CHECK-GI-NEXT: neg v4.2d, v4.2d ; CHECK-GI-NEXT: ldr q6, [x8, :lo12:.LCPI22_0] ; CHECK-GI-NEXT: adrp x8, .LCPI22_3 ; CHECK-GI-NEXT: neg v5.2d, v5.2d ; CHECK-GI-NEXT: ushl v2.2d, v3.2d, v2.2d ; CHECK-GI-NEXT: sshr v3.2d, v1.2d, #63 ; CHECK-GI-NEXT: shl v6.2d, v6.2d, #63 ; CHECK-GI-NEXT: add v2.2d, v0.2d, v2.2d ; CHECK-GI-NEXT: ushl v3.2d, v3.2d, v4.2d ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_3] ; CHECK-GI-NEXT: sshl v2.2d, v2.2d, v5.2d ; CHECK-GI-NEXT: sshr v5.2d, v6.2d, #63 ; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d ; CHECK-GI-NEXT: neg v3.2d, v4.2d ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v5.16b ; CHECK-GI-NEXT: sshl v1.2d, v1.2d, v3.2d ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i64> %x, ret <4 x i64> %1 } define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v8i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI23_0 ; CHECK-SD-NEXT: cmlt v4.2d, v0.2d, #0 ; CHECK-SD-NEXT: cmlt v6.2d, v2.2d, #0 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI23_0] ; CHECK-SD-NEXT: adrp x8, .LCPI23_3 ; CHECK-SD-NEXT: cmlt v7.2d, v3.2d, #0 ; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI23_3] ; CHECK-SD-NEXT: adrp x8, .LCPI23_1 ; CHECK-SD-NEXT: ushl v4.2d, v4.2d, v5.2d ; CHECK-SD-NEXT: ushl v5.2d, v6.2d, v5.2d ; CHECK-SD-NEXT: cmlt v6.2d, v1.2d, #0 ; CHECK-SD-NEXT: ldr q17, [x8, :lo12:.LCPI23_1] ; CHECK-SD-NEXT: ushl v7.2d, v7.2d, v16.2d ; CHECK-SD-NEXT: adrp x8, .LCPI23_2 ; CHECK-SD-NEXT: add v4.2d, v0.2d, v4.2d ; CHECK-SD-NEXT: add v5.2d, v2.2d, v5.2d ; CHECK-SD-NEXT: ushl v6.2d, v6.2d, v16.2d ; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI23_2] ; CHECK-SD-NEXT: adrp x8, .LCPI23_4 ; CHECK-SD-NEXT: add v3.2d, v3.2d, v7.2d ; CHECK-SD-NEXT: sshl v4.2d, v4.2d, v17.2d ; CHECK-SD-NEXT: sshl v5.2d, v5.2d, v17.2d ; CHECK-SD-NEXT: add v1.2d, v1.2d, v6.2d ; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI23_4] ; CHECK-SD-NEXT: bif v0.16b, v4.16b, v16.16b ; CHECK-SD-NEXT: bif v2.16b, v5.16b, v16.16b ; CHECK-SD-NEXT: sshl v1.2d, v1.2d, v6.2d ; CHECK-SD-NEXT: sshl v3.2d, v3.2d, v6.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NEXT: sshr v7.2d, v0.2d, #63 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI23_1 ; CHECK-GI-NEXT: sshr v16.2d, v1.2d, #63 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI23_1] ; CHECK-GI-NEXT: sshr v17.2d, v2.2d, #63 ; CHECK-GI-NEXT: sshr v18.2d, v3.2d, #63 ; CHECK-GI-NEXT: adrp x8, .LCPI23_3 ; CHECK-GI-NEXT: mov v4.h[1], w9 ; CHECK-GI-NEXT: neg v5.2d, v5.2d ; CHECK-GI-NEXT: ldr q19, [x8, :lo12:.LCPI23_3] ; CHECK-GI-NEXT: neg v19.2d, v19.2d ; CHECK-GI-NEXT: ushl v7.2d, v7.2d, v5.2d ; CHECK-GI-NEXT: ushl v5.2d, v17.2d, v5.2d ; CHECK-GI-NEXT: mov v4.h[2], w9 ; CHECK-GI-NEXT: add v7.2d, v0.2d, v7.2d ; CHECK-GI-NEXT: add v5.2d, v2.2d, v5.2d ; CHECK-GI-NEXT: mov v4.h[3], w9 ; CHECK-GI-NEXT: adrp x9, .LCPI23_0 ; CHECK-GI-NEXT: ldr q6, [x9, :lo12:.LCPI23_0] ; CHECK-GI-NEXT: adrp x9, .LCPI23_2 ; CHECK-GI-NEXT: sshl v7.2d, v7.2d, v19.2d ; CHECK-GI-NEXT: ldr q20, [x9, :lo12:.LCPI23_2] ; CHECK-GI-NEXT: sshl v5.2d, v5.2d, v19.2d ; CHECK-GI-NEXT: neg v6.2d, v6.2d ; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0 ; CHECK-GI-NEXT: neg v20.2d, v20.2d ; CHECK-GI-NEXT: ushl v16.2d, v16.2d, v6.2d ; CHECK-GI-NEXT: ushl v6.2d, v18.2d, v6.2d ; CHECK-GI-NEXT: ushll v17.2d, v4.2s, #0 ; CHECK-GI-NEXT: ushll2 v18.2d, v4.4s, #0 ; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0 ; CHECK-GI-NEXT: add v16.2d, v1.2d, v16.2d ; CHECK-GI-NEXT: add v6.2d, v3.2d, v6.2d ; CHECK-GI-NEXT: shl v17.2d, v17.2d, #63 ; CHECK-GI-NEXT: shl v18.2d, v18.2d, #63 ; CHECK-GI-NEXT: shl v4.2d, v4.2d, #63 ; CHECK-GI-NEXT: sshl v16.2d, v16.2d, v20.2d ; CHECK-GI-NEXT: sshl v6.2d, v6.2d, v20.2d ; CHECK-GI-NEXT: sshr v17.2d, v17.2d, #63 ; CHECK-GI-NEXT: sshr v18.2d, v18.2d, #63 ; CHECK-GI-NEXT: sshr v4.2d, v4.2d, #63 ; CHECK-GI-NEXT: bif v0.16b, v7.16b, v17.16b ; CHECK-GI-NEXT: bif v1.16b, v16.16b, v18.16b ; CHECK-GI-NEXT: bif v2.16b, v5.16b, v4.16b ; CHECK-GI-NEXT: bif v3.16b, v6.16b, v18.16b ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i64> %x, ret <8 x i64> %1 } define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI24_0 ; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] ; CHECK-SD-NEXT: adrp x8, .LCPI24_1 ; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI24_1] ; CHECK-SD-NEXT: adrp x8, .LCPI24_2 ; CHECK-SD-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: sshl v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI24_2] ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: movi v1.2d, #0xffffffff00000000 ; CHECK-SD-NEXT: neg v2.4s, v0.4s ; CHECK-SD-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_PosAndNeg: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NEXT: adrp x10, .LCPI24_0 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: ldr q2, [x10, :lo12:.LCPI24_0] ; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31 ; CHECK-GI-NEXT: fmov s4, w9 ; CHECK-GI-NEXT: adrp x10, .LCPI24_1 ; CHECK-GI-NEXT: neg v2.4s, v2.4s ; CHECK-GI-NEXT: mov v1.s[1], w9 ; CHECK-GI-NEXT: mov v4.s[1], w8 ; CHECK-GI-NEXT: ushl v2.4s, v3.4s, v2.4s ; CHECK-GI-NEXT: ldr q3, [x10, :lo12:.LCPI24_1] ; CHECK-GI-NEXT: mov v1.s[2], w9 ; CHECK-GI-NEXT: neg v3.4s, v3.4s ; CHECK-GI-NEXT: add v2.4s, v0.4s, v2.4s ; CHECK-GI-NEXT: mov v4.d[1], v4.d[0] ; CHECK-GI-NEXT: mov v1.s[3], w9 ; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31 ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: neg v2.4s, v0.4s ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = sdiv <4 x i32> %x, ret <4 x i32> %1 } ; PR37119 define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) { ; CHECK-SD-LABEL: non_splat_minus_one_divisor_0: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: umov w9, v0.b[0] ; CHECK-SD-NEXT: mov w8, wzr ; CHECK-SD-NEXT: umov w10, v0.b[1] ; CHECK-SD-NEXT: sub w9, w8, w9, sxtb ; CHECK-SD-NEXT: sub w10, w8, w10, sxtb ; CHECK-SD-NEXT: fmov s1, w9 ; CHECK-SD-NEXT: smov w9, v0.b[2] ; CHECK-SD-NEXT: mov v1.b[1], w10 ; CHECK-SD-NEXT: umov w10, v0.b[3] ; CHECK-SD-NEXT: mov v1.b[2], w9 ; CHECK-SD-NEXT: sub w9, w8, w10, sxtb ; CHECK-SD-NEXT: umov w10, v0.b[4] ; CHECK-SD-NEXT: mov v1.b[3], w9 ; CHECK-SD-NEXT: sub w9, w8, w10, sxtb ; CHECK-SD-NEXT: umov w10, v0.b[5] ; CHECK-SD-NEXT: mov v1.b[4], w9 ; CHECK-SD-NEXT: sub w9, w8, w10, sxtb ; CHECK-SD-NEXT: umov w10, v0.b[7] ; CHECK-SD-NEXT: mov v1.b[5], w9 ; CHECK-SD-NEXT: smov w9, v0.b[6] ; CHECK-SD-NEXT: mov v1.b[6], w9 ; CHECK-SD-NEXT: sub w9, w8, w10, sxtb ; CHECK-SD-NEXT: umov w10, v0.b[8] ; CHECK-SD-NEXT: mov v1.b[7], w9 ; CHECK-SD-NEXT: sub w8, w8, w10, sxtb ; CHECK-SD-NEXT: mov v1.b[8], w8 ; CHECK-SD-NEXT: smov w8, v0.b[9] ; CHECK-SD-NEXT: mov v1.b[9], w8 ; CHECK-SD-NEXT: smov w8, v0.b[10] ; CHECK-SD-NEXT: mov v1.b[10], w8 ; CHECK-SD-NEXT: smov w8, v0.b[11] ; CHECK-SD-NEXT: mov v1.b[11], w8 ; CHECK-SD-NEXT: smov w8, v0.b[12] ; CHECK-SD-NEXT: mov v1.b[12], w8 ; CHECK-SD-NEXT: smov w8, v0.b[13] ; CHECK-SD-NEXT: mov v1.b[13], w8 ; CHECK-SD-NEXT: smov w8, v0.b[14] ; CHECK-SD-NEXT: mov v1.b[14], w8 ; CHECK-SD-NEXT: smov w8, v0.b[15] ; CHECK-SD-NEXT: mov v1.b[15], w8 ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: non_splat_minus_one_divisor_0: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI25_0 ; CHECK-GI-NEXT: neg v2.16b, v0.16b ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] ; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7 ; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %div = sdiv <16 x i8> %A, ret <16 x i8> %div } define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) { ; CHECK-SD-LABEL: non_splat_minus_one_divisor_1: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI26_1 ; CHECK-SD-NEXT: cmlt v1.16b, v0.16b, #0 ; CHECK-SD-NEXT: adrp x9, .LCPI26_3 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_1] ; CHECK-SD-NEXT: adrp x8, .LCPI26_2 ; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI26_3] ; CHECK-SD-NEXT: adrp x9, .LCPI26_5 ; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_2] ; CHECK-SD-NEXT: adrp x8, .LCPI26_0 ; CHECK-SD-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: sshl v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_0] ; CHECK-SD-NEXT: adrp x8, .LCPI26_4 ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_4] ; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b ; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI26_5] ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: neg v1.16b, v0.16b ; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b ; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: non_splat_minus_one_divisor_1: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI26_2 ; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7 ; CHECK-GI-NEXT: adrp x9, .LCPI26_1 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI26_2] ; CHECK-GI-NEXT: adrp x8, .LCPI26_3 ; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI26_1] ; CHECK-GI-NEXT: neg v1.16b, v1.16b ; CHECK-GI-NEXT: shl v3.16b, v3.16b, #7 ; CHECK-GI-NEXT: ushl v1.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_3] ; CHECK-GI-NEXT: adrp x8, .LCPI26_0 ; CHECK-GI-NEXT: neg v2.16b, v2.16b ; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI26_0] ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: shl v1.16b, v3.16b, #7 ; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 ; CHECK-GI-NEXT: neg v2.16b, v0.16b ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %div = sdiv <16 x i8> %A, ret <16 x i8> %div } define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) { ; CHECK-SD-LABEL: non_splat_minus_one_divisor_2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI27_0 ; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI27_0] ; CHECK-SD-NEXT: adrp x8, .LCPI27_1 ; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI27_1] ; CHECK-SD-NEXT: adrp x8, .LCPI27_2 ; CHECK-SD-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: sshl v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI27_2] ; CHECK-SD-NEXT: adrp x8, .LCPI27_3 ; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI27_3] ; CHECK-SD-NEXT: neg v1.4s, v0.4s ; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: non_splat_minus_one_divisor_2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: adrp x9, .LCPI27_0 ; CHECK-GI-NEXT: mov w10, #0 // =0x0 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: ldr q2, [x9, :lo12:.LCPI27_0] ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31 ; CHECK-GI-NEXT: adrp x9, .LCPI27_1 ; CHECK-GI-NEXT: neg v2.4s, v2.4s ; CHECK-GI-NEXT: mov v1.s[1], w8 ; CHECK-GI-NEXT: mov v4.s[1], w10 ; CHECK-GI-NEXT: ushl v2.4s, v3.4s, v2.4s ; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI27_1] ; CHECK-GI-NEXT: mov v1.s[2], w10 ; CHECK-GI-NEXT: mov v4.s[2], w10 ; CHECK-GI-NEXT: neg v3.4s, v3.4s ; CHECK-GI-NEXT: add v2.4s, v0.4s, v2.4s ; CHECK-GI-NEXT: mov v1.s[3], w10 ; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: mov v4.s[3], w8 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31 ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: neg v2.4s, v0.4s ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %div = sdiv <4 x i32> %A, ret <4 x i32> %div } define <8 x i16> @combine_vec_sdiv_nonuniform(<8 x i16> %x) { ; CHECK-LABEL: combine_vec_sdiv_nonuniform: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI28_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI28_0] ; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h ; CHECK-NEXT: usra v0.8h, v0.8h, #15 ; CHECK-NEXT: ret %1 = sdiv <8 x i16> %x, ret <8 x i16> %1 } define <8 x i16> @combine_vec_sdiv_nonuniform2(<8 x i16> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_nonuniform2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI29_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI29_0] ; CHECK-SD-NEXT: adrp x8, .LCPI29_1 ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-SD-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI29_1] ; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v2.8h ; CHECK-SD-NEXT: sshl v0.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_nonuniform2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI29_1 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI29_1] ; CHECK-GI-NEXT: adrp x8, .LCPI29_0 ; CHECK-GI-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI29_0] ; CHECK-GI-NEXT: neg v1.8h, v1.8h ; CHECK-GI-NEXT: uzp2 v0.8h, v0.8h, v2.8h ; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: usra v0.8h, v0.8h, #15 ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, ret <8 x i16> %1 } define <8 x i16> @combine_vec_sdiv_nonuniform3(<8 x i16> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_nonuniform3: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI30_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_0] ; CHECK-SD-NEXT: adrp x8, .LCPI30_1 ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] ; CHECK-SD-NEXT: sshl v0.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_nonuniform3: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI30_1 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] ; CHECK-GI-NEXT: adrp x8, .LCPI30_0 ; CHECK-GI-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI30_0] ; CHECK-GI-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: neg v1.8h, v2.8h ; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: usra v0.8h, v0.8h, #15 ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, ret <8 x i16> %1 } define <8 x i16> @combine_vec_sdiv_nonuniform4(<8 x i16> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_nonuniform4: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI31_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI31_0] ; CHECK-SD-NEXT: adrp x8, .LCPI31_1 ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: sub v0.8h, v1.8h, v0.8h ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI31_1] ; CHECK-SD-NEXT: sshl v0.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_nonuniform4: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI31_1 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI31_1] ; CHECK-GI-NEXT: adrp x8, .LCPI31_0 ; CHECK-GI-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI31_0] ; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: neg v1.8h, v2.8h ; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: usra v0.8h, v0.8h, #15 ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, ret <8 x i16> %1 } define <8 x i16> @combine_vec_sdiv_nonuniform5(<8 x i16> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_nonuniform5: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI32_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI32_0] ; CHECK-SD-NEXT: adrp x8, .LCPI32_1 ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI32_1] ; CHECK-SD-NEXT: adrp x8, .LCPI32_2 ; CHECK-SD-NEXT: mla v1.8h, v0.8h, v2.8h ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI32_2] ; CHECK-SD-NEXT: sshl v0.8h, v1.8h, v0.8h ; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_nonuniform5: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI32_2 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI32_2] ; CHECK-GI-NEXT: adrp x8, .LCPI32_1 ; CHECK-GI-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI32_1] ; CHECK-GI-NEXT: adrp x8, .LCPI32_0 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI32_0] ; CHECK-GI-NEXT: mla v1.8h, v0.8h, v2.8h ; CHECK-GI-NEXT: neg v0.8h, v3.8h ; CHECK-GI-NEXT: sshl v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: usra v0.8h, v0.8h, #15 ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, ret <8 x i16> %1 } define <8 x i16> @combine_vec_sdiv_nonuniform6(<8 x i16> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_nonuniform6: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI33_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI33_0] ; CHECK-SD-NEXT: adrp x8, .LCPI33_1 ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_1] ; CHECK-SD-NEXT: adrp x8, .LCPI33_2 ; CHECK-SD-NEXT: mla v1.8h, v0.8h, v2.8h ; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI33_2] ; CHECK-SD-NEXT: adrp x8, .LCPI33_3 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_3] ; CHECK-SD-NEXT: sshl v0.8h, v1.8h, v0.8h ; CHECK-SD-NEXT: ushr v1.8h, v0.8h, #15 ; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_nonuniform6: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI33_3 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI33_3] ; CHECK-GI-NEXT: adrp x8, .LCPI33_2 ; CHECK-GI-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_2] ; CHECK-GI-NEXT: adrp x8, .LCPI33_1 ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI33_1] ; CHECK-GI-NEXT: adrp x8, .LCPI33_0 ; CHECK-GI-NEXT: mla v1.8h, v0.8h, v2.8h ; CHECK-GI-NEXT: neg v0.8h, v3.8h ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] ; CHECK-GI-NEXT: sshl v0.8h, v1.8h, v0.8h ; CHECK-GI-NEXT: ushr v1.8h, v0.8h, #15 ; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, ret <8 x i16> %1 } define <8 x i16> @combine_vec_sdiv_nonuniform7(<8 x i16> %x) { ; CHECK-SD-LABEL: combine_vec_sdiv_nonuniform7: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: umov w9, v0.h[0] ; CHECK-SD-NEXT: mov w8, wzr ; CHECK-SD-NEXT: umov w10, v0.h[1] ; CHECK-SD-NEXT: umov w11, v0.h[2] ; CHECK-SD-NEXT: sub w9, w8, w9, sxth ; CHECK-SD-NEXT: sub w10, w8, w10, sxth ; CHECK-SD-NEXT: fmov s1, w9 ; CHECK-SD-NEXT: sub w9, w8, w11, sxth ; CHECK-SD-NEXT: mov v1.h[1], w10 ; CHECK-SD-NEXT: umov w10, v0.h[3] ; CHECK-SD-NEXT: mov v1.h[2], w9 ; CHECK-SD-NEXT: sub w8, w8, w10, sxth ; CHECK-SD-NEXT: mov v1.h[3], w8 ; CHECK-SD-NEXT: smov w8, v0.h[4] ; CHECK-SD-NEXT: mov v1.h[4], w8 ; CHECK-SD-NEXT: smov w8, v0.h[5] ; CHECK-SD-NEXT: mov v1.h[5], w8 ; CHECK-SD-NEXT: smov w8, v0.h[6] ; CHECK-SD-NEXT: mov v1.h[6], w8 ; CHECK-SD-NEXT: smov w8, v0.h[7] ; CHECK-SD-NEXT: mov v1.h[7], w8 ; CHECK-SD-NEXT: mov v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_vec_sdiv_nonuniform7: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI34_0 ; CHECK-GI-NEXT: neg v2.8h, v0.8h ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: shl v1.8h, v1.8h, #15 ; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15 ; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i16> %x, ret <8 x i16> %1 } define <16 x i8> @pr38658(<16 x i8> %x) { ; CHECK-SD-LABEL: pr38658: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI35_0 ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI35_0] ; CHECK-SD-NEXT: adrp x8, .LCPI35_1 ; CHECK-SD-NEXT: smull2 v2.8h, v0.16b, v1.16b ; CHECK-SD-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-SD-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: add v0.16b, v1.16b, v0.16b ; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI35_1] ; CHECK-SD-NEXT: sshl v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: movi v1.2d, #0000000000000000 ; CHECK-SD-NEXT: ushr v2.16b, v0.16b, #7 ; CHECK-SD-NEXT: mov v1.b[15], v2.b[15] ; CHECK-SD-NEXT: add v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: pr38658: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI35_2 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI35_2] ; CHECK-GI-NEXT: adrp x8, .LCPI35_1 ; CHECK-GI-NEXT: smull2 v2.8h, v0.16b, v1.16b ; CHECK-GI-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-GI-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_1] ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 ; CHECK-GI-NEXT: add v0.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: neg v1.16b, v2.16b ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NEXT: sshl v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ushr v1.16b, v0.16b, #7 ; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret %1 = sdiv <16 x i8> %x, ret <16 x i8> %1 } define i1 @bool_sdiv(i1 %x, i1 %y) { ; CHECK-SD-LABEL: bool_sdiv: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: and w0, w0, #0x1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: bool_sdiv: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sbfx w8, w0, #0, #1 ; CHECK-GI-NEXT: sbfx w9, w1, #0, #1 ; CHECK-GI-NEXT: sdiv w8, w8, w9 ; CHECK-GI-NEXT: and w0, w8, #0x1 ; CHECK-GI-NEXT: ret %r = sdiv i1 %x, %y ret i1 %r } define <4 x i1> @boolvec_sdiv(<4 x i1> %x, <4 x i1> %y) { ; CHECK-SD-LABEL: boolvec_sdiv: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: boolvec_sdiv: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: umov w8, v0.h[0] ; CHECK-GI-NEXT: umov w9, v1.h[0] ; CHECK-GI-NEXT: umov w10, v1.h[1] ; CHECK-GI-NEXT: umov w11, v1.h[2] ; CHECK-GI-NEXT: umov w12, v1.h[3] ; CHECK-GI-NEXT: sbfx w8, w8, #0, #1 ; CHECK-GI-NEXT: sbfx w9, w9, #0, #1 ; CHECK-GI-NEXT: sbfx w10, w10, #0, #1 ; CHECK-GI-NEXT: sbfx w11, w11, #0, #1 ; CHECK-GI-NEXT: sbfx w12, w12, #0, #1 ; CHECK-GI-NEXT: sdiv w8, w8, w9 ; CHECK-GI-NEXT: umov w9, v0.h[1] ; CHECK-GI-NEXT: sbfx w9, w9, #0, #1 ; CHECK-GI-NEXT: sdiv w9, w9, w10 ; CHECK-GI-NEXT: umov w10, v0.h[2] ; CHECK-GI-NEXT: sbfx w10, w10, #0, #1 ; CHECK-GI-NEXT: sdiv w10, w10, w11 ; CHECK-GI-NEXT: umov w11, v0.h[3] ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov v0.h[1], w9 ; CHECK-GI-NEXT: sbfx w11, w11, #0, #1 ; CHECK-GI-NEXT: sdiv w8, w11, w12 ; CHECK-GI-NEXT: mov v0.h[2], w10 ; CHECK-GI-NEXT: mov v0.h[3], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %r = sdiv <4 x i1> %x, %y ret <4 x i1> %r } define i32 @combine_sdiv_two(i32 %x) { ; CHECK-SD-LABEL: combine_sdiv_two: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: add w8, w0, w0, lsr #31 ; CHECK-SD-NEXT: asr w0, w8, #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_sdiv_two: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: asr w8, w0, #31 ; CHECK-GI-NEXT: add w8, w0, w8, lsr #31 ; CHECK-GI-NEXT: asr w0, w8, #1 ; CHECK-GI-NEXT: ret %1 = sdiv i32 %x, 2 ret i32 %1 } define i32 @combine_sdiv_negtwo(i32 %x) { ; CHECK-SD-LABEL: combine_sdiv_negtwo: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: add w8, w0, w0, lsr #31 ; CHECK-SD-NEXT: neg w0, w8, asr #1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_sdiv_negtwo: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: asr w8, w0, #31 ; CHECK-GI-NEXT: add w8, w0, w8, lsr #31 ; CHECK-GI-NEXT: neg w0, w8, asr #1 ; CHECK-GI-NEXT: ret %1 = sdiv i32 %x, -2 ret i32 %1 } define i8 @combine_i8_sdiv_pow2(i8 %x) { ; CHECK-SD-LABEL: combine_i8_sdiv_pow2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sxtb w8, w0 ; CHECK-SD-NEXT: ubfx w8, w8, #11, #4 ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: sbfx w0, w8, #4, #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i8_sdiv_pow2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sbfx w8, w0, #7, #1 ; CHECK-GI-NEXT: ubfx w8, w8, #4, #4 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: sbfx w0, w8, #4, #4 ; CHECK-GI-NEXT: ret %1 = sdiv i8 %x, 16 ret i8 %1 } define i8 @combine_i8_sdiv_negpow2(i8 %x) { ; CHECK-SD-LABEL: combine_i8_sdiv_negpow2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sxtb w8, w0 ; CHECK-SD-NEXT: ubfx w8, w8, #9, #6 ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: sxtb w8, w8 ; CHECK-SD-NEXT: neg w0, w8, asr #6 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i8_sdiv_negpow2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sbfx w8, w0, #7, #1 ; CHECK-GI-NEXT: ubfx w8, w8, #2, #6 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: sxtb w8, w8 ; CHECK-GI-NEXT: neg w0, w8, asr #6 ; CHECK-GI-NEXT: ret %1 = sdiv i8 %x, -64 ret i8 %1 } define i16 @combine_i16_sdiv_pow2(i16 %x) { ; CHECK-SD-LABEL: combine_i16_sdiv_pow2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sxth w8, w0 ; CHECK-SD-NEXT: ubfx w8, w8, #27, #4 ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: sbfx w0, w8, #4, #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i16_sdiv_pow2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sbfx w8, w0, #15, #1 ; CHECK-GI-NEXT: ubfx w8, w8, #12, #4 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: sbfx w0, w8, #4, #12 ; CHECK-GI-NEXT: ret %1 = sdiv i16 %x, 16 ret i16 %1 } define i16 @combine_i16_sdiv_negpow2(i16 %x) { ; CHECK-SD-LABEL: combine_i16_sdiv_negpow2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sxth w8, w0 ; CHECK-SD-NEXT: lsr w8, w8, #23 ; CHECK-SD-NEXT: add w8, w0, w8, uxtb ; CHECK-SD-NEXT: sxth w8, w8 ; CHECK-SD-NEXT: neg w0, w8, asr #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i16_sdiv_negpow2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sbfx w8, w0, #15, #1 ; CHECK-GI-NEXT: ubfx w8, w8, #8, #8 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: sxth w8, w8 ; CHECK-GI-NEXT: neg w0, w8, asr #8 ; CHECK-GI-NEXT: ret %1 = sdiv i16 %x, -256 ret i16 %1 } define i32 @combine_i32_sdiv_pow2(i32 %x) { ; CHECK-SD-LABEL: combine_i32_sdiv_pow2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: add w8, w0, #15 ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w8, w8, w0, lt ; CHECK-SD-NEXT: asr w0, w8, #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i32_sdiv_pow2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: asr w8, w0, #31 ; CHECK-GI-NEXT: add w8, w0, w8, lsr #28 ; CHECK-GI-NEXT: asr w0, w8, #4 ; CHECK-GI-NEXT: ret %1 = sdiv i32 %x, 16 ret i32 %1 } define i32 @combine_i32_sdiv_negpow2(i32 %x) { ; CHECK-SD-LABEL: combine_i32_sdiv_negpow2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: add w8, w0, #255 ; CHECK-SD-NEXT: cmp w0, #0 ; CHECK-SD-NEXT: csel w8, w8, w0, lt ; CHECK-SD-NEXT: neg w0, w8, asr #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i32_sdiv_negpow2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: asr w8, w0, #31 ; CHECK-GI-NEXT: add w8, w0, w8, lsr #24 ; CHECK-GI-NEXT: neg w0, w8, asr #8 ; CHECK-GI-NEXT: ret %1 = sdiv i32 %x, -256 ret i32 %1 } define i64 @combine_i64_sdiv_pow2(i64 %x) { ; CHECK-SD-LABEL: combine_i64_sdiv_pow2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: add x8, x0, #15 ; CHECK-SD-NEXT: cmp x0, #0 ; CHECK-SD-NEXT: csel x8, x8, x0, lt ; CHECK-SD-NEXT: asr x0, x8, #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i64_sdiv_pow2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: asr x8, x0, #63 ; CHECK-GI-NEXT: add x8, x0, x8, lsr #60 ; CHECK-GI-NEXT: asr x0, x8, #4 ; CHECK-GI-NEXT: ret %1 = sdiv i64 %x, 16 ret i64 %1 } define i64 @combine_i64_sdiv_negpow2(i64 %x) { ; CHECK-SD-LABEL: combine_i64_sdiv_negpow2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: add x8, x0, #255 ; CHECK-SD-NEXT: cmp x0, #0 ; CHECK-SD-NEXT: csel x8, x8, x0, lt ; CHECK-SD-NEXT: neg x0, x8, asr #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i64_sdiv_negpow2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: asr x8, x0, #63 ; CHECK-GI-NEXT: add x8, x0, x8, lsr #56 ; CHECK-GI-NEXT: neg x0, x8, asr #8 ; CHECK-GI-NEXT: ret %1 = sdiv i64 %x, -256 ret i64 %1 } define i5 @combine_i5_sdiv_const7(i5 %x) { ; CHECK-SD-LABEL: combine_i5_sdiv_const7: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493 ; CHECK-SD-NEXT: sbfx x9, x0, #0, #5 ; CHECK-SD-NEXT: movk x8, #37449, lsl #16 ; CHECK-SD-NEXT: smull x8, w9, w8 ; CHECK-SD-NEXT: lsl w9, w0, #27 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: add w8, w8, w9, asr #27 ; CHECK-SD-NEXT: asr w9, w8, #2 ; CHECK-SD-NEXT: add w0, w9, w8, lsr #31 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i5_sdiv_const7: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #19 // =0x13 ; CHECK-GI-NEXT: sbfx w9, w0, #0, #5 ; CHECK-GI-NEXT: sbfx w8, w8, #0, #5 ; CHECK-GI-NEXT: mul w8, w9, w8 ; CHECK-GI-NEXT: sbfx w8, w8, #0, #10 ; CHECK-GI-NEXT: add w8, w0, w8, asr #5 ; CHECK-GI-NEXT: sbfx w8, w8, #0, #5 ; CHECK-GI-NEXT: asr w8, w8, #2 ; CHECK-GI-NEXT: ubfx w9, w8, #4, #1 ; CHECK-GI-NEXT: add w0, w8, w9 ; CHECK-GI-NEXT: ret %1 = sdiv i5 %x, 7 ret i5 %1 } define i5 @combine_i5_sdiv_const100(i5 %x) { ; CHECK-SD-LABEL: combine_i5_sdiv_const100: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sbfx w8, w0, #4, #1 ; CHECK-SD-NEXT: and w8, w8, #0x3 ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: sbfx w0, w8, #2, #3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i5_sdiv_const100: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sbfx w8, w0, #0, #5 ; CHECK-GI-NEXT: asr w8, w8, #4 ; CHECK-GI-NEXT: ubfx w8, w8, #3, #2 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: sbfx w8, w8, #0, #5 ; CHECK-GI-NEXT: asr w0, w8, #2 ; CHECK-GI-NEXT: ret %1 = sdiv i5 %x, 100 ret i5 %1 } define i8 @combine_i8_sdiv_const7(i8 %x) { ; CHECK-SD-LABEL: combine_i8_sdiv_const7: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sxtb w8, w0 ; CHECK-SD-NEXT: mov w9, #-109 // =0xffffff93 ; CHECK-SD-NEXT: mul w8, w8, w9 ; CHECK-SD-NEXT: add w8, w0, w8, lsr #8 ; CHECK-SD-NEXT: sbfx w9, w8, #2, #6 ; CHECK-SD-NEXT: and w8, w8, #0x80 ; CHECK-SD-NEXT: add w0, w9, w8, lsr #7 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i8_sdiv_const7: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: mov w9, #-109 // =0xffffff93 ; CHECK-GI-NEXT: mul w8, w8, w9 ; CHECK-GI-NEXT: sxth w8, w8 ; CHECK-GI-NEXT: add w8, w0, w8, asr #8 ; CHECK-GI-NEXT: sbfx w8, w8, #2, #6 ; CHECK-GI-NEXT: ubfx w9, w8, #7, #1 ; CHECK-GI-NEXT: add w0, w8, w9 ; CHECK-GI-NEXT: ret %1 = sdiv i8 %x, 7 ret i8 %1 } define i8 @combine_i8_sdiv_const100(i8 %x) { ; CHECK-SD-LABEL: combine_i8_sdiv_const100: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sxtb w8, w0 ; CHECK-SD-NEXT: mov w9, #41 // =0x29 ; CHECK-SD-NEXT: mul w8, w8, w9 ; CHECK-SD-NEXT: asr w9, w8, #12 ; CHECK-SD-NEXT: add w0, w9, w8, lsr #31 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i8_sdiv_const100: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: mov w9, #41 // =0x29 ; CHECK-GI-NEXT: mul w8, w8, w9 ; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 ; CHECK-GI-NEXT: asr w8, w8, #4 ; CHECK-GI-NEXT: ubfx w9, w8, #7, #1 ; CHECK-GI-NEXT: add w0, w8, w9 ; CHECK-GI-NEXT: ret %1 = sdiv i8 %x, 100 ret i8 %1 } define i16 @combine_i16_sdiv_const7(i16 %x) { ; CHECK-SD-LABEL: combine_i16_sdiv_const7: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sxth w8, w0 ; CHECK-SD-NEXT: mov w9, #18725 // =0x4925 ; CHECK-SD-NEXT: mul w8, w8, w9 ; CHECK-SD-NEXT: asr w9, w8, #17 ; CHECK-SD-NEXT: add w0, w9, w8, lsr #31 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i16_sdiv_const7: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sxth w8, w0 ; CHECK-GI-NEXT: mov w9, #18725 // =0x4925 ; CHECK-GI-NEXT: mul w8, w8, w9 ; CHECK-GI-NEXT: asr w8, w8, #16 ; CHECK-GI-NEXT: asr w8, w8, #1 ; CHECK-GI-NEXT: ubfx w9, w8, #15, #1 ; CHECK-GI-NEXT: add w0, w8, w9 ; CHECK-GI-NEXT: ret %1 = sdiv i16 %x, 7 ret i16 %1 } define i16 @combine_i16_sdiv_const100(i16 %x) { ; CHECK-SD-LABEL: combine_i16_sdiv_const100: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sxth w8, w0 ; CHECK-SD-NEXT: mov w9, #5243 // =0x147b ; CHECK-SD-NEXT: mul w8, w8, w9 ; CHECK-SD-NEXT: asr w9, w8, #19 ; CHECK-SD-NEXT: add w0, w9, w8, lsr #31 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i16_sdiv_const100: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sxth w8, w0 ; CHECK-GI-NEXT: mov w9, #5243 // =0x147b ; CHECK-GI-NEXT: mul w8, w8, w9 ; CHECK-GI-NEXT: asr w8, w8, #16 ; CHECK-GI-NEXT: asr w8, w8, #3 ; CHECK-GI-NEXT: ubfx w9, w8, #15, #1 ; CHECK-GI-NEXT: add w0, w8, w9 ; CHECK-GI-NEXT: ret %1 = sdiv i16 %x, 100 ret i16 %1 } define i32 @combine_i32_sdiv_const7(i32 %x) { ; CHECK-SD-LABEL: combine_i32_sdiv_const7: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: movk w8, #37449, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: add w8, w8, w0 ; CHECK-SD-NEXT: asr w9, w8, #2 ; CHECK-SD-NEXT: add w0, w9, w8, lsr #31 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i32_sdiv_const7: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #9363 // =0x2493 ; CHECK-GI-NEXT: movk w8, #37449, lsl #16 ; CHECK-GI-NEXT: smull x8, w0, w8 ; CHECK-GI-NEXT: asr x8, x8, #32 ; CHECK-GI-NEXT: add w8, w8, w0 ; CHECK-GI-NEXT: asr w8, w8, #2 ; CHECK-GI-NEXT: add w0, w8, w8, lsr #31 ; CHECK-GI-NEXT: ret %1 = sdiv i32 %x, 7 ret i32 %1 } define i32 @combine_i32_sdiv_const100(i32 %x) { ; CHECK-SD-LABEL: combine_i32_sdiv_const100: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 ; CHECK-SD-NEXT: asr x8, x8, #37 ; CHECK-SD-NEXT: add w0, w8, w8, lsr #31 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i32_sdiv_const100: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #34079 // =0x851f ; CHECK-GI-NEXT: movk w8, #20971, lsl #16 ; CHECK-GI-NEXT: smull x8, w0, w8 ; CHECK-GI-NEXT: asr x8, x8, #32 ; CHECK-GI-NEXT: asr w8, w8, #5 ; CHECK-GI-NEXT: add w0, w8, w8, lsr #31 ; CHECK-GI-NEXT: ret %1 = sdiv i32 %x, 100 ret i32 %1 } define i64 @combine_i64_sdiv_const7(i64 %x) { ; CHECK-SD-LABEL: combine_i64_sdiv_const7: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov x8, #18725 // =0x4925 ; CHECK-SD-NEXT: movk x8, #9362, lsl #16 ; CHECK-SD-NEXT: movk x8, #37449, lsl #32 ; CHECK-SD-NEXT: movk x8, #18724, lsl #48 ; CHECK-SD-NEXT: smulh x8, x0, x8 ; CHECK-SD-NEXT: asr x9, x8, #1 ; CHECK-SD-NEXT: add x0, x9, x8, lsr #63 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i64_sdiv_const7: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov x8, #18725 // =0x4925 ; CHECK-GI-NEXT: movk x8, #9362, lsl #16 ; CHECK-GI-NEXT: movk x8, #37449, lsl #32 ; CHECK-GI-NEXT: movk x8, #18724, lsl #48 ; CHECK-GI-NEXT: smulh x8, x0, x8 ; CHECK-GI-NEXT: asr x8, x8, #1 ; CHECK-GI-NEXT: add x0, x8, x8, lsr #63 ; CHECK-GI-NEXT: ret %1 = sdiv i64 %x, 7 ret i64 %1 } define i64 @combine_i64_sdiv_const100(i64 %x) { ; CHECK-SD-LABEL: combine_i64_sdiv_const100: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov x8, #55051 // =0xd70b ; CHECK-SD-NEXT: movk x8, #28835, lsl #16 ; CHECK-SD-NEXT: movk x8, #2621, lsl #32 ; CHECK-SD-NEXT: movk x8, #41943, lsl #48 ; CHECK-SD-NEXT: smulh x8, x0, x8 ; CHECK-SD-NEXT: add x8, x8, x0 ; CHECK-SD-NEXT: asr x9, x8, #6 ; CHECK-SD-NEXT: add x0, x9, x8, lsr #63 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i64_sdiv_const100: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov x8, #55051 // =0xd70b ; CHECK-GI-NEXT: movk x8, #28835, lsl #16 ; CHECK-GI-NEXT: movk x8, #2621, lsl #32 ; CHECK-GI-NEXT: movk x8, #41943, lsl #48 ; CHECK-GI-NEXT: smulh x8, x0, x8 ; CHECK-GI-NEXT: add x8, x8, x0 ; CHECK-GI-NEXT: asr x8, x8, #6 ; CHECK-GI-NEXT: add x0, x8, x8, lsr #63 ; CHECK-GI-NEXT: ret %1 = sdiv i64 %x, 100 ret i64 %1 } define i128 @combine_i128_sdiv_const7(i128 %x) { ; CHECK-LABEL: combine_i128_sdiv_const7: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w2, #7 // =0x7 ; CHECK-NEXT: mov x3, xzr ; CHECK-NEXT: bl __divti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %1 = sdiv i128 %x, 7 ret i128 %1 } define i128 @combine_i128_sdiv_const100(i128 %x) { ; CHECK-LABEL: combine_i128_sdiv_const100: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w2, #100 // =0x64 ; CHECK-NEXT: mov x3, xzr ; CHECK-NEXT: bl __divti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %1 = sdiv i128 %x, 100 ret i128 %1 }