; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64-none-eabi -global-isel=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-eabi -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i8 @si8_7(i8 %a, i8 %b) { ; CHECK-SD-LABEL: si8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sxtb w8, w0 ; CHECK-SD-NEXT: mov w9, #-109 // =0xffffff93 ; CHECK-SD-NEXT: mul w8, w8, w9 ; CHECK-SD-NEXT: add w8, w0, w8, lsr #8 ; CHECK-SD-NEXT: sbfx w9, w8, #2, #6 ; CHECK-SD-NEXT: and w8, w8, #0x80 ; CHECK-SD-NEXT: add w8, w9, w8, lsr #7 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: add w0, w0, w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: si8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: mov w9, #-109 // =0xffffff93 ; CHECK-GI-NEXT: mul w8, w8, w9 ; CHECK-GI-NEXT: sxth w8, w8 ; CHECK-GI-NEXT: add w8, w0, w8, asr #8 ; CHECK-GI-NEXT: sbfx w8, w8, #2, #6 ; CHECK-GI-NEXT: ubfx w9, w8, #7, #1 ; CHECK-GI-NEXT: add w8, w8, w9 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 ; CHECK-GI-NEXT: ret entry: %s = srem i8 %a, 7 ret i8 %s } define i8 @si8_100(i8 %a, i8 %b) { ; CHECK-SD-LABEL: si8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sxtb w8, w0 ; CHECK-SD-NEXT: mov w9, #41 // =0x29 ; CHECK-SD-NEXT: mul w8, w8, w9 ; CHECK-SD-NEXT: asr w9, w8, #12 ; CHECK-SD-NEXT: add w8, w9, w8, lsr #31 ; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: msub w0, w8, w9, w0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: si8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: mov w9, #41 // =0x29 ; CHECK-GI-NEXT: mul w8, w8, w9 ; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 ; CHECK-GI-NEXT: asr w8, w8, #4 ; CHECK-GI-NEXT: ubfx w9, w8, #7, #1 ; CHECK-GI-NEXT: add w8, w8, w9 ; CHECK-GI-NEXT: mov w9, #100 // =0x64 ; CHECK-GI-NEXT: msub w0, w8, w9, w0 ; CHECK-GI-NEXT: ret entry: %s = srem i8 %a, 100 ret i8 %s } define i8 @ui8_7(i8 %a, i8 %b) { ; CHECK-SD-LABEL: ui8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #37 // =0x25 ; CHECK-SD-NEXT: and w9, w0, #0xff ; CHECK-SD-NEXT: mul w8, w9, w8 ; CHECK-SD-NEXT: lsr w8, w8, #8 ; CHECK-SD-NEXT: sub w9, w0, w8 ; CHECK-SD-NEXT: and w9, w9, #0xfe ; CHECK-SD-NEXT: add w8, w8, w9, lsr #1 ; CHECK-SD-NEXT: lsr w8, w8, #2 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: add w0, w0, w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ui8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #37 // =0x25 ; CHECK-GI-NEXT: and w9, w0, #0xff ; CHECK-GI-NEXT: mul w8, w9, w8 ; CHECK-GI-NEXT: lsr w8, w8, #8 ; CHECK-GI-NEXT: sub w9, w0, w8 ; CHECK-GI-NEXT: ubfx w9, w9, #1, #7 ; CHECK-GI-NEXT: add w8, w9, w8 ; CHECK-GI-NEXT: ubfx w8, w8, #2, #6 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 ; CHECK-GI-NEXT: ret entry: %s = urem i8 %a, 7 ret i8 %s } define i8 @ui8_100(i8 %a, i8 %b) { ; CHECK-SD-LABEL: ui8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #41 // =0x29 ; CHECK-SD-NEXT: and w9, w0, #0xff ; CHECK-SD-NEXT: mul w8, w9, w8 ; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: lsr w8, w8, #12 ; CHECK-SD-NEXT: msub w0, w8, w9, w0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ui8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #41 // =0x29 ; CHECK-GI-NEXT: and w9, w0, #0xff ; CHECK-GI-NEXT: mul w8, w9, w8 ; CHECK-GI-NEXT: mov w9, #100 // =0x64 ; CHECK-GI-NEXT: lsr w8, w8, #8 ; CHECK-GI-NEXT: lsr w8, w8, #4 ; CHECK-GI-NEXT: msub w0, w8, w9, w0 ; CHECK-GI-NEXT: ret entry: %s = urem i8 %a, 100 ret i8 %s } define i16 @si16_7(i16 %a, i16 %b) { ; CHECK-SD-LABEL: si16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sxth w8, w0 ; CHECK-SD-NEXT: mov w9, #18725 // =0x4925 ; CHECK-SD-NEXT: mul w8, w8, w9 ; CHECK-SD-NEXT: asr w9, w8, #17 ; CHECK-SD-NEXT: add w8, w9, w8, lsr #31 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: add w0, w0, w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: si16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sxth w8, w0 ; CHECK-GI-NEXT: mov w9, #18725 // =0x4925 ; CHECK-GI-NEXT: mul w8, w8, w9 ; CHECK-GI-NEXT: asr w8, w8, #16 ; CHECK-GI-NEXT: asr w8, w8, #1 ; CHECK-GI-NEXT: ubfx w9, w8, #15, #1 ; CHECK-GI-NEXT: add w8, w8, w9 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 ; CHECK-GI-NEXT: ret entry: %s = srem i16 %a, 7 ret i16 %s } define i16 @si16_100(i16 %a, i16 %b) { ; CHECK-SD-LABEL: si16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: sxth w8, w0 ; CHECK-SD-NEXT: mov w9, #5243 // =0x147b ; CHECK-SD-NEXT: mul w8, w8, w9 ; CHECK-SD-NEXT: asr w9, w8, #19 ; CHECK-SD-NEXT: add w8, w9, w8, lsr #31 ; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: msub w0, w8, w9, w0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: si16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sxth w8, w0 ; CHECK-GI-NEXT: mov w9, #5243 // =0x147b ; CHECK-GI-NEXT: mul w8, w8, w9 ; CHECK-GI-NEXT: asr w8, w8, #16 ; CHECK-GI-NEXT: asr w8, w8, #3 ; CHECK-GI-NEXT: ubfx w9, w8, #15, #1 ; CHECK-GI-NEXT: add w8, w8, w9 ; CHECK-GI-NEXT: mov w9, #100 // =0x64 ; CHECK-GI-NEXT: msub w0, w8, w9, w0 ; CHECK-GI-NEXT: ret entry: %s = srem i16 %a, 100 ret i16 %s } define i16 @ui16_7(i16 %a, i16 %b) { ; CHECK-SD-LABEL: ui16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: and w9, w0, #0xffff ; CHECK-SD-NEXT: mul w8, w9, w8 ; CHECK-SD-NEXT: lsr w8, w8, #16 ; CHECK-SD-NEXT: sub w9, w0, w8 ; CHECK-SD-NEXT: and w9, w9, #0xfffe ; CHECK-SD-NEXT: add w8, w8, w9, lsr #1 ; CHECK-SD-NEXT: lsr w8, w8, #2 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: add w0, w0, w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ui16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #9363 // =0x2493 ; CHECK-GI-NEXT: and w9, w0, #0xffff ; CHECK-GI-NEXT: mul w8, w9, w8 ; CHECK-GI-NEXT: lsr w8, w8, #16 ; CHECK-GI-NEXT: sub w9, w0, w8 ; CHECK-GI-NEXT: ubfx w9, w9, #1, #15 ; CHECK-GI-NEXT: add w8, w9, w8 ; CHECK-GI-NEXT: ubfx w8, w8, #2, #14 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 ; CHECK-GI-NEXT: ret entry: %s = urem i16 %a, 7 ret i16 %s } define i16 @ui16_100(i16 %a, i16 %b) { ; CHECK-SD-LABEL: ui16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ubfx w8, w0, #2, #14 ; CHECK-SD-NEXT: mov w9, #5243 // =0x147b ; CHECK-SD-NEXT: mul w8, w8, w9 ; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: lsr w8, w8, #17 ; CHECK-SD-NEXT: msub w0, w8, w9, w0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ui16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ubfx w8, w0, #2, #14 ; CHECK-GI-NEXT: mov w9, #5243 // =0x147b ; CHECK-GI-NEXT: mul w8, w8, w9 ; CHECK-GI-NEXT: mov w9, #100 // =0x64 ; CHECK-GI-NEXT: lsr w8, w8, #16 ; CHECK-GI-NEXT: lsr w8, w8, #1 ; CHECK-GI-NEXT: msub w0, w8, w9, w0 ; CHECK-GI-NEXT: ret entry: %s = urem i16 %a, 100 ret i16 %s } define i32 @si32_7(i32 %a, i32 %b) { ; CHECK-SD-LABEL: si32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: movk w8, #37449, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: add w8, w8, w0 ; CHECK-SD-NEXT: asr w9, w8, #2 ; CHECK-SD-NEXT: add w8, w9, w8, lsr #31 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: add w0, w0, w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: si32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #9363 // =0x2493 ; CHECK-GI-NEXT: movk w8, #37449, lsl #16 ; CHECK-GI-NEXT: smull x8, w0, w8 ; CHECK-GI-NEXT: asr x8, x8, #32 ; CHECK-GI-NEXT: add w8, w8, w0 ; CHECK-GI-NEXT: asr w8, w8, #2 ; CHECK-GI-NEXT: add w8, w8, w8, lsr #31 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 ; CHECK-GI-NEXT: ret entry: %s = srem i32 %a, 7 ret i32 %s } define i32 @si32_100(i32 %a, i32 %b) { ; CHECK-SD-LABEL: si32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 ; CHECK-SD-NEXT: asr x8, x8, #37 ; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 ; CHECK-SD-NEXT: msub w0, w8, w9, w0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: si32_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #34079 // =0x851f ; CHECK-GI-NEXT: mov w9, #100 // =0x64 ; CHECK-GI-NEXT: movk w8, #20971, lsl #16 ; CHECK-GI-NEXT: smull x8, w0, w8 ; CHECK-GI-NEXT: asr x8, x8, #32 ; CHECK-GI-NEXT: asr w8, w8, #5 ; CHECK-GI-NEXT: add w8, w8, w8, lsr #31 ; CHECK-GI-NEXT: msub w0, w8, w9, w0 ; CHECK-GI-NEXT: ret entry: %s = srem i32 %a, 100 ret i32 %s } define i32 @ui32_7(i32 %a, i32 %b) { ; CHECK-SD-LABEL: ui32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: umull x8, w0, w8 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: sub w9, w0, w8 ; CHECK-SD-NEXT: add w8, w8, w9, lsr #1 ; CHECK-SD-NEXT: lsr w8, w8, #2 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: add w0, w0, w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ui32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #18725 // =0x4925 ; CHECK-GI-NEXT: movk w8, #9362, lsl #16 ; CHECK-GI-NEXT: umull x8, w0, w8 ; CHECK-GI-NEXT: lsr x8, x8, #32 ; CHECK-GI-NEXT: sub w9, w0, w8 ; CHECK-GI-NEXT: add w8, w8, w9, lsr #1 ; CHECK-GI-NEXT: lsr w8, w8, #2 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w0, w0, w8 ; CHECK-GI-NEXT: ret entry: %s = urem i32 %a, 7 ret i32 %s } define i32 @ui32_100(i32 %a, i32 %b) { ; CHECK-SD-LABEL: ui32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: umull x8, w0, w8 ; CHECK-SD-NEXT: lsr x8, x8, #37 ; CHECK-SD-NEXT: msub w0, w8, w9, w0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ui32_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #34079 // =0x851f ; CHECK-GI-NEXT: mov w9, #100 // =0x64 ; CHECK-GI-NEXT: movk w8, #20971, lsl #16 ; CHECK-GI-NEXT: umull x8, w0, w8 ; CHECK-GI-NEXT: lsr x8, x8, #32 ; CHECK-GI-NEXT: lsr w8, w8, #5 ; CHECK-GI-NEXT: msub w0, w8, w9, w0 ; CHECK-GI-NEXT: ret entry: %s = urem i32 %a, 100 ret i32 %s } define i64 @si64_7(i64 %a, i64 %b) { ; CHECK-SD-LABEL: si64_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov x8, #18725 // =0x4925 ; CHECK-SD-NEXT: movk x8, #9362, lsl #16 ; CHECK-SD-NEXT: movk x8, #37449, lsl #32 ; CHECK-SD-NEXT: movk x8, #18724, lsl #48 ; CHECK-SD-NEXT: smulh x8, x0, x8 ; CHECK-SD-NEXT: asr x9, x8, #1 ; CHECK-SD-NEXT: add x8, x9, x8, lsr #63 ; CHECK-SD-NEXT: sub x8, x8, x8, lsl #3 ; CHECK-SD-NEXT: add x0, x0, x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: si64_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x8, #18725 // =0x4925 ; CHECK-GI-NEXT: movk x8, #9362, lsl #16 ; CHECK-GI-NEXT: movk x8, #37449, lsl #32 ; CHECK-GI-NEXT: movk x8, #18724, lsl #48 ; CHECK-GI-NEXT: smulh x8, x0, x8 ; CHECK-GI-NEXT: asr x8, x8, #1 ; CHECK-GI-NEXT: add x8, x8, x8, lsr #63 ; CHECK-GI-NEXT: lsl x9, x8, #3 ; CHECK-GI-NEXT: sub x8, x9, x8 ; CHECK-GI-NEXT: sub x0, x0, x8 ; CHECK-GI-NEXT: ret entry: %s = srem i64 %a, 7 ret i64 %s } define i64 @si64_100(i64 %a, i64 %b) { ; CHECK-SD-LABEL: si64_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov x8, #55051 // =0xd70b ; CHECK-SD-NEXT: movk x8, #28835, lsl #16 ; CHECK-SD-NEXT: movk x8, #2621, lsl #32 ; CHECK-SD-NEXT: movk x8, #41943, lsl #48 ; CHECK-SD-NEXT: smulh x8, x0, x8 ; CHECK-SD-NEXT: add x8, x8, x0 ; CHECK-SD-NEXT: asr x9, x8, #6 ; CHECK-SD-NEXT: add x8, x9, x8, lsr #63 ; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: msub x0, x8, x9, x0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: si64_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x8, #55051 // =0xd70b ; CHECK-GI-NEXT: mov w9, #100 // =0x64 ; CHECK-GI-NEXT: movk x8, #28835, lsl #16 ; CHECK-GI-NEXT: movk x8, #2621, lsl #32 ; CHECK-GI-NEXT: movk x8, #41943, lsl #48 ; CHECK-GI-NEXT: smulh x8, x0, x8 ; CHECK-GI-NEXT: add x8, x8, x0 ; CHECK-GI-NEXT: asr x8, x8, #6 ; CHECK-GI-NEXT: add x8, x8, x8, lsr #63 ; CHECK-GI-NEXT: msub x0, x8, x9, x0 ; CHECK-GI-NEXT: ret entry: %s = srem i64 %a, 100 ret i64 %s } define i64 @ui64_7(i64 %a, i64 %b) { ; CHECK-SD-LABEL: ui64_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov x8, #9363 // =0x2493 ; CHECK-SD-NEXT: movk x8, #37449, lsl #16 ; CHECK-SD-NEXT: movk x8, #18724, lsl #32 ; CHECK-SD-NEXT: movk x8, #9362, lsl #48 ; CHECK-SD-NEXT: umulh x8, x0, x8 ; CHECK-SD-NEXT: sub x9, x0, x8 ; CHECK-SD-NEXT: add x8, x8, x9, lsr #1 ; CHECK-SD-NEXT: lsr x8, x8, #2 ; CHECK-SD-NEXT: sub x8, x8, x8, lsl #3 ; CHECK-SD-NEXT: add x0, x0, x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ui64_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x8, #9363 // =0x2493 ; CHECK-GI-NEXT: movk x8, #37449, lsl #16 ; CHECK-GI-NEXT: movk x8, #18724, lsl #32 ; CHECK-GI-NEXT: movk x8, #9362, lsl #48 ; CHECK-GI-NEXT: umulh x8, x0, x8 ; CHECK-GI-NEXT: sub x9, x0, x8 ; CHECK-GI-NEXT: add x8, x8, x9, lsr #1 ; CHECK-GI-NEXT: lsr x8, x8, #2 ; CHECK-GI-NEXT: lsl x9, x8, #3 ; CHECK-GI-NEXT: sub x8, x9, x8 ; CHECK-GI-NEXT: sub x0, x0, x8 ; CHECK-GI-NEXT: ret entry: %s = urem i64 %a, 7 ret i64 %s } define i64 @ui64_100(i64 %a, i64 %b) { ; CHECK-LABEL: ui64_100: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov x9, #62915 // =0xf5c3 ; CHECK-NEXT: lsr x8, x0, #2 ; CHECK-NEXT: movk x9, #23592, lsl #16 ; CHECK-NEXT: movk x9, #49807, lsl #32 ; CHECK-NEXT: movk x9, #10485, lsl #48 ; CHECK-NEXT: umulh x8, x8, x9 ; CHECK-NEXT: mov w9, #100 // =0x64 ; CHECK-NEXT: lsr x8, x8, #2 ; CHECK-NEXT: msub x0, x8, x9, x0 ; CHECK-NEXT: ret entry: %s = urem i64 %a, 100 ret i64 %s } define i128 @si128_7(i128 %a, i128 %b) { ; CHECK-LABEL: si128_7: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w2, #7 // =0x7 ; CHECK-NEXT: mov x3, xzr ; CHECK-NEXT: bl __modti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %s = srem i128 %a, 7 ret i128 %s } define i128 @si128_100(i128 %a, i128 %b) { ; CHECK-LABEL: si128_100: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: mov w2, #100 // =0x64 ; CHECK-NEXT: mov x3, xzr ; CHECK-NEXT: bl __modti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %s = srem i128 %a, 100 ret i128 %s } define i128 @ui128_7(i128 %a, i128 %b) { ; CHECK-SD-LABEL: ui128_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov w2, #7 // =0x7 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __umodti3 ; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ui128_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x8, #18725 // =0x4925 ; CHECK-GI-NEXT: mov x10, #9362 // =0x2492 ; CHECK-GI-NEXT: movk x8, #9362, lsl #16 ; CHECK-GI-NEXT: movk x10, #37449, lsl #16 ; CHECK-GI-NEXT: movk x8, #37449, lsl #32 ; CHECK-GI-NEXT: movk x10, #18724, lsl #32 ; CHECK-GI-NEXT: movk x8, #18724, lsl #48 ; CHECK-GI-NEXT: movk x10, #9362, lsl #48 ; CHECK-GI-NEXT: mul x9, x1, x8 ; CHECK-GI-NEXT: mul x11, x0, x10 ; CHECK-GI-NEXT: umulh x12, x0, x8 ; CHECK-GI-NEXT: mul x13, x1, x10 ; CHECK-GI-NEXT: adds x9, x9, x11 ; CHECK-GI-NEXT: umulh x14, x1, x8 ; CHECK-GI-NEXT: cset w11, hs ; CHECK-GI-NEXT: cmn x9, x12 ; CHECK-GI-NEXT: and x9, x11, #0x1 ; CHECK-GI-NEXT: sub x12, x0, x0 ; CHECK-GI-NEXT: umulh x15, x0, x10 ; CHECK-GI-NEXT: cset w11, hs ; CHECK-GI-NEXT: and x11, x11, #0x1 ; CHECK-GI-NEXT: add x12, x13, x12 ; CHECK-GI-NEXT: and x13, xzr, #0x1 ; CHECK-GI-NEXT: umulh x8, xzr, x8 ; CHECK-GI-NEXT: add x9, x9, x11 ; CHECK-GI-NEXT: and x11, xzr, #0x1 ; CHECK-GI-NEXT: adds x12, x12, x14 ; CHECK-GI-NEXT: add x11, x11, x13 ; CHECK-GI-NEXT: umulh x10, x1, x10 ; CHECK-GI-NEXT: cset w13, hs ; CHECK-GI-NEXT: adds x12, x12, x15 ; CHECK-GI-NEXT: and x13, x13, #0x1 ; CHECK-GI-NEXT: umulh x14, x0, xzr ; CHECK-GI-NEXT: cset w15, hs ; CHECK-GI-NEXT: adds x9, x12, x9 ; CHECK-GI-NEXT: add x11, x11, x13 ; CHECK-GI-NEXT: and x12, x15, #0x1 ; CHECK-GI-NEXT: cset w13, hs ; CHECK-GI-NEXT: add x11, x11, x12 ; CHECK-GI-NEXT: and x12, x13, #0x1 ; CHECK-GI-NEXT: add x8, x8, x10 ; CHECK-GI-NEXT: add x10, x11, x12 ; CHECK-GI-NEXT: add x8, x8, x14 ; CHECK-GI-NEXT: add x8, x8, x10 ; CHECK-GI-NEXT: subs x10, x0, x9 ; CHECK-GI-NEXT: sbc x11, x1, x8 ; CHECK-GI-NEXT: lsl x12, x11, #63 ; CHECK-GI-NEXT: lsr x11, x11, #1 ; CHECK-GI-NEXT: orr x10, x12, x10, lsr #1 ; CHECK-GI-NEXT: adds x9, x10, x9 ; CHECK-GI-NEXT: adc x8, x11, x8 ; CHECK-GI-NEXT: lsl x10, x8, #62 ; CHECK-GI-NEXT: lsr x8, x8, #2 ; CHECK-GI-NEXT: orr x9, x10, x9, lsr #2 ; CHECK-GI-NEXT: mov w10, #7 // =0x7 ; CHECK-GI-NEXT: lsl x12, x8, #3 ; CHECK-GI-NEXT: umulh x10, x9, x10 ; CHECK-GI-NEXT: lsl x11, x9, #3 ; CHECK-GI-NEXT: sub x8, x12, x8 ; CHECK-GI-NEXT: sub x9, x11, x9 ; CHECK-GI-NEXT: subs x0, x0, x9 ; CHECK-GI-NEXT: add x8, x8, x10 ; CHECK-GI-NEXT: sbc x1, x1, x8 ; CHECK-GI-NEXT: ret entry: %s = urem i128 %a, 7 ret i128 %s } define i128 @ui128_100(i128 %a, i128 %b) { ; CHECK-SD-LABEL: ui128_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: mov w2, #100 // =0x64 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __umodti3 ; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: ui128_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x8, #23593 // =0x5c29 ; CHECK-GI-NEXT: mov x10, #62914 // =0xf5c2 ; CHECK-GI-NEXT: movk x8, #49807, lsl #16 ; CHECK-GI-NEXT: movk x10, #23592, lsl #16 ; CHECK-GI-NEXT: movk x8, #10485, lsl #32 ; CHECK-GI-NEXT: movk x10, #49807, lsl #32 ; CHECK-GI-NEXT: movk x8, #36700, lsl #48 ; CHECK-GI-NEXT: movk x10, #10485, lsl #48 ; CHECK-GI-NEXT: mul x9, x1, x8 ; CHECK-GI-NEXT: mul x11, x0, x10 ; CHECK-GI-NEXT: umulh x12, x0, x8 ; CHECK-GI-NEXT: mul x13, x1, x10 ; CHECK-GI-NEXT: adds x9, x9, x11 ; CHECK-GI-NEXT: umulh x14, x1, x8 ; CHECK-GI-NEXT: cset w11, hs ; CHECK-GI-NEXT: cmn x9, x12 ; CHECK-GI-NEXT: and x9, x11, #0x1 ; CHECK-GI-NEXT: sub x12, x0, x0 ; CHECK-GI-NEXT: umulh x15, x0, x10 ; CHECK-GI-NEXT: cset w11, hs ; CHECK-GI-NEXT: and x11, x11, #0x1 ; CHECK-GI-NEXT: add x12, x13, x12 ; CHECK-GI-NEXT: and x13, xzr, #0x1 ; CHECK-GI-NEXT: umulh x8, xzr, x8 ; CHECK-GI-NEXT: add x9, x9, x11 ; CHECK-GI-NEXT: and x11, xzr, #0x1 ; CHECK-GI-NEXT: adds x12, x12, x14 ; CHECK-GI-NEXT: add x11, x11, x13 ; CHECK-GI-NEXT: umulh x10, x1, x10 ; CHECK-GI-NEXT: cset w13, hs ; CHECK-GI-NEXT: adds x12, x12, x15 ; CHECK-GI-NEXT: and x13, x13, #0x1 ; CHECK-GI-NEXT: umulh x14, x0, xzr ; CHECK-GI-NEXT: cset w15, hs ; CHECK-GI-NEXT: adds x9, x12, x9 ; CHECK-GI-NEXT: add x11, x11, x13 ; CHECK-GI-NEXT: and x12, x15, #0x1 ; CHECK-GI-NEXT: cset w13, hs ; CHECK-GI-NEXT: add x11, x11, x12 ; CHECK-GI-NEXT: and x12, x13, #0x1 ; CHECK-GI-NEXT: add x8, x8, x10 ; CHECK-GI-NEXT: add x10, x11, x12 ; CHECK-GI-NEXT: add x8, x8, x14 ; CHECK-GI-NEXT: add x8, x8, x10 ; CHECK-GI-NEXT: lsl x10, x8, #60 ; CHECK-GI-NEXT: lsr x8, x8, #4 ; CHECK-GI-NEXT: orr x9, x10, x9, lsr #4 ; CHECK-GI-NEXT: mov w10, #100 // =0x64 ; CHECK-GI-NEXT: umulh x11, x9, x10 ; CHECK-GI-NEXT: mul x9, x9, x10 ; CHECK-GI-NEXT: madd x8, x8, x10, x11 ; CHECK-GI-NEXT: subs x0, x0, x9 ; CHECK-GI-NEXT: sbc x1, x1, x8 ; CHECK-GI-NEXT: ret entry: %s = urem i128 %a, 100 ret i128 %s } define <2 x i8> @sv2i8_7(<2 x i8> %d, <2 x i8> %e) { ; CHECK-SD-LABEL: sv2i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v1.2s, v0.2s, #24 ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: movi v3.2s, #7 ; CHECK-SD-NEXT: movk w8, #37449, lsl #16 ; CHECK-SD-NEXT: dup v2.2s, w8 ; CHECK-SD-NEXT: sshr v0.2s, v1.2s, #24 ; CHECK-SD-NEXT: smull v2.2d, v0.2s, v2.2s ; CHECK-SD-NEXT: shrn v2.2s, v2.2d, #32 ; CHECK-SD-NEXT: ssra v2.2s, v1.2s, #24 ; CHECK-SD-NEXT: sshr v1.2s, v2.2s, #2 ; CHECK-SD-NEXT: usra v1.2s, v2.2s, #31 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #65427 // =0xff93 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov v1.h[1], w8 ; CHECK-GI-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #8 ; CHECK-GI-NEXT: smov w8, v1.h[0] ; CHECK-GI-NEXT: smov w9, v1.h[1] ; CHECK-GI-NEXT: shl v1.2s, v0.2s, #24 ; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #24 ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov w8, #8 // =0x8 ; CHECK-GI-NEXT: mov v2.s[1], w9 ; CHECK-GI-NEXT: mul v1.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: mov w8, #2 // =0x2 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: neg v2.4h, v2.4h ; CHECK-GI-NEXT: sshl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mov v2.b[1], w8 ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: add v1.2s, v1.2s, v0.2s ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: neg v2.8b, v2.8b ; CHECK-GI-NEXT: mov w9, v1.s[1] ; CHECK-GI-NEXT: mov v1.b[1], w9 ; CHECK-GI-NEXT: sshl v1.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: neg v2.8b, v3.8b ; CHECK-GI-NEXT: movi v3.2s, #7 ; CHECK-GI-NEXT: ushl v2.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: umov w8, v1.b[0] ; CHECK-GI-NEXT: umov w10, v1.b[1] ; CHECK-GI-NEXT: umov w9, v2.b[0] ; CHECK-GI-NEXT: umov w11, v2.b[1] ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: fmov s2, w9 ; CHECK-GI-NEXT: mov v1.s[1], w10 ; CHECK-GI-NEXT: mov v2.s[1], w11 ; CHECK-GI-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i8> %d, ret <2 x i8> %s } define <2 x i8> @sv2i8_100(<2 x i8> %d, <2 x i8> %e) { ; CHECK-SD-LABEL: sv2i8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: movi v2.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37 ; CHECK-SD-NEXT: xtn v1.2s, v1.2d ; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #41 // =0x29 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov v1.h[1], w8 ; CHECK-GI-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #8 ; CHECK-GI-NEXT: smov w8, v1.h[0] ; CHECK-GI-NEXT: smov w9, v1.h[1] ; CHECK-GI-NEXT: shl v1.2s, v0.2s, #24 ; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #24 ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov w8, #8 // =0x8 ; CHECK-GI-NEXT: mov v2.s[1], w9 ; CHECK-GI-NEXT: mul v1.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: mov w8, #4 // =0x4 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: neg v2.4h, v2.4h ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: sshl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: neg v3.8b, v3.8b ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: mov v2.b[1], w8 ; CHECK-GI-NEXT: sshl v1.8b, v1.8b, v3.8b ; CHECK-GI-NEXT: neg v2.8b, v2.8b ; CHECK-GI-NEXT: movi v3.2s, #100 ; CHECK-GI-NEXT: ushl v2.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: umov w8, v1.b[0] ; CHECK-GI-NEXT: umov w10, v1.b[1] ; CHECK-GI-NEXT: umov w9, v2.b[0] ; CHECK-GI-NEXT: umov w11, v2.b[1] ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: fmov s2, w9 ; CHECK-GI-NEXT: mov v1.s[1], w10 ; CHECK-GI-NEXT: mov v2.s[1], w11 ; CHECK-GI-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i8> %d, ret <2 x i8> %s } define <3 x i8> @sv3i8_7(<3 x i8> %d, <3 x i8> %e) { ; CHECK-SD-LABEL: sv3i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: sxtb x8, w0 ; CHECK-SD-NEXT: mov x9, #-56173 // =0xffffffffffff2493 ; CHECK-SD-NEXT: sxtb x10, w1 ; CHECK-SD-NEXT: sxtb x11, w2 ; CHECK-SD-NEXT: movk x9, #37449, lsl #16 ; CHECK-SD-NEXT: sxtb w12, w1 ; CHECK-SD-NEXT: smull x8, w8, w9 ; CHECK-SD-NEXT: sxtb w13, w0 ; CHECK-SD-NEXT: smull x10, w10, w9 ; CHECK-SD-NEXT: smull x9, w11, w9 ; CHECK-SD-NEXT: sxtb w11, w2 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: lsr x10, x10, #32 ; CHECK-SD-NEXT: lsr x9, x9, #32 ; CHECK-SD-NEXT: add w8, w8, w13 ; CHECK-SD-NEXT: add w10, w10, w12 ; CHECK-SD-NEXT: asr w14, w8, #2 ; CHECK-SD-NEXT: add w9, w9, w11 ; CHECK-SD-NEXT: asr w15, w10, #2 ; CHECK-SD-NEXT: asr w16, w9, #2 ; CHECK-SD-NEXT: add w8, w14, w8, lsr #31 ; CHECK-SD-NEXT: add w10, w15, w10, lsr #31 ; CHECK-SD-NEXT: add w9, w16, w9, lsr #31 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3 ; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3 ; CHECK-SD-NEXT: add w0, w13, w8 ; CHECK-SD-NEXT: add w1, w12, w10 ; CHECK-SD-NEXT: add w2, w11, w9 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv3i8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: sxtb w11, w1 ; CHECK-GI-NEXT: sxtb w13, w2 ; CHECK-GI-NEXT: mov w9, #7 // =0x7 ; CHECK-GI-NEXT: sdiv w10, w8, w9 ; CHECK-GI-NEXT: sdiv w12, w11, w9 ; CHECK-GI-NEXT: lsl w14, w10, #3 ; CHECK-GI-NEXT: sub w10, w14, w10 ; CHECK-GI-NEXT: sub w0, w8, w10 ; CHECK-GI-NEXT: sdiv w9, w13, w9 ; CHECK-GI-NEXT: lsl w15, w12, #3 ; CHECK-GI-NEXT: sub w12, w15, w12 ; CHECK-GI-NEXT: sub w1, w11, w12 ; CHECK-GI-NEXT: lsl w16, w9, #3 ; CHECK-GI-NEXT: sub w9, w16, w9 ; CHECK-GI-NEXT: sub w2, w13, w9 ; CHECK-GI-NEXT: ret entry: %s = srem <3 x i8> %d, ret <3 x i8> %s } define <3 x i8> @sv3i8_100(<3 x i8> %d, <3 x i8> %e) { ; CHECK-SD-LABEL: sv3i8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: sxtb x8, w0 ; CHECK-SD-NEXT: mov w9, #34079 // =0x851f ; CHECK-SD-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-SD-NEXT: sxtb x10, w1 ; CHECK-SD-NEXT: movk w9, #20971, lsl #16 ; CHECK-SD-NEXT: sxtb x11, w2 ; CHECK-SD-NEXT: sxtb w12, w0 ; CHECK-SD-NEXT: smull x8, w8, w9 ; CHECK-SD-NEXT: smull x10, w10, w9 ; CHECK-SD-NEXT: smull x9, w11, w9 ; CHECK-SD-NEXT: mov w11, #100 // =0x64 ; CHECK-SD-NEXT: asr x8, x8, #37 ; CHECK-SD-NEXT: asr x10, x10, #37 ; CHECK-SD-NEXT: asr x9, x9, #37 ; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 ; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 ; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 ; CHECK-SD-NEXT: msub w0, w8, w11, w12 ; CHECK-SD-NEXT: sxtb w8, w1 ; CHECK-SD-NEXT: msub w1, w10, w11, w8 ; CHECK-SD-NEXT: sxtb w8, w2 ; CHECK-SD-NEXT: msub w2, w9, w11, w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv3i8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sxtb w8, w0 ; CHECK-GI-NEXT: sxtb w11, w1 ; CHECK-GI-NEXT: sxtb w13, w2 ; CHECK-GI-NEXT: mov w9, #100 // =0x64 ; CHECK-GI-NEXT: sdiv w10, w8, w9 ; CHECK-GI-NEXT: sdiv w12, w11, w9 ; CHECK-GI-NEXT: msub w0, w10, w9, w8 ; CHECK-GI-NEXT: sdiv w14, w13, w9 ; CHECK-GI-NEXT: msub w1, w12, w9, w11 ; CHECK-GI-NEXT: msub w2, w14, w9, w13 ; CHECK-GI-NEXT: ret entry: %s = srem <3 x i8> %d, ret <3 x i8> %s } define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) { ; CHECK-SD-LABEL: sv4i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493 ; CHECK-SD-NEXT: movk x8, #37449, lsl #16 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: smov x9, v0.h[0] ; CHECK-SD-NEXT: smov x10, v0.h[1] ; CHECK-SD-NEXT: smov w11, v0.h[0] ; CHECK-SD-NEXT: smov x12, v0.h[2] ; CHECK-SD-NEXT: smov w13, v0.h[1] ; CHECK-SD-NEXT: smov x14, v0.h[3] ; CHECK-SD-NEXT: smov w16, v0.h[2] ; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x10, w10, w8 ; CHECK-SD-NEXT: smull x12, w12, w8 ; CHECK-SD-NEXT: lsr x9, x9, #32 ; CHECK-SD-NEXT: smull x8, w14, w8 ; CHECK-SD-NEXT: smov w14, v0.h[3] ; CHECK-SD-NEXT: lsr x10, x10, #32 ; CHECK-SD-NEXT: add w9, w9, w11 ; CHECK-SD-NEXT: lsr x12, x12, #32 ; CHECK-SD-NEXT: asr w15, w9, #2 ; CHECK-SD-NEXT: add w10, w10, w13 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: asr w17, w10, #2 ; CHECK-SD-NEXT: add w12, w12, w16 ; CHECK-SD-NEXT: add w9, w15, w9, lsr #31 ; CHECK-SD-NEXT: asr w15, w12, #2 ; CHECK-SD-NEXT: add w8, w8, w14 ; CHECK-SD-NEXT: add w10, w17, w10, lsr #31 ; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3 ; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3 ; CHECK-SD-NEXT: add w9, w11, w9 ; CHECK-SD-NEXT: fmov s0, w9 ; CHECK-SD-NEXT: add w10, w13, w10 ; CHECK-SD-NEXT: add w9, w15, w12, lsr #31 ; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: asr w10, w8, #2 ; CHECK-SD-NEXT: add w9, w16, w9 ; CHECK-SD-NEXT: add w8, w10, w8, lsr #31 ; CHECK-SD-NEXT: mov v0.h[2], w9 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: add w8, w14, w8 ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv4i8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #147 // =0x93 ; CHECK-GI-NEXT: shl v2.4h, v0.4h, #8 ; CHECK-GI-NEXT: mov w9, #7 // =0x7 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: fmov s4, w9 ; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #8 ; CHECK-GI-NEXT: mov v1.b[1], w8 ; CHECK-GI-NEXT: mov v4.b[1], w9 ; CHECK-GI-NEXT: mov v1.b[2], w8 ; CHECK-GI-NEXT: mov v4.b[2], w9 ; CHECK-GI-NEXT: mov v1.b[3], w8 ; CHECK-GI-NEXT: mov w8, #2 // =0x2 ; CHECK-GI-NEXT: mov v4.b[3], w9 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: mul v1.4h, v2.4h, v1.4h ; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: mov v3.b[2], w8 ; CHECK-GI-NEXT: ssra v2.4h, v1.4h, #8 ; CHECK-GI-NEXT: mov v3.b[3], w8 ; CHECK-GI-NEXT: uzp1 v1.8b, v2.8b, v0.8b ; CHECK-GI-NEXT: neg v2.8b, v3.8b ; CHECK-GI-NEXT: dup v3.4h, w9 ; CHECK-GI-NEXT: sshl v1.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: neg v2.8b, v4.8b ; CHECK-GI-NEXT: ushl v2.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-GI-NEXT: add v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: mls v0.4h, v1.4h, v3.4h ; CHECK-GI-NEXT: ret entry: %s = srem <4 x i8> %d, ret <4 x i8> %s } define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) { ; CHECK-SD-LABEL: sv4i8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: mov w14, #100 // =0x64 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: sshr v1.4h, v0.4h, #8 ; CHECK-SD-NEXT: smov x9, v1.h[0] ; CHECK-SD-NEXT: smov x10, v1.h[1] ; CHECK-SD-NEXT: smov x11, v1.h[2] ; CHECK-SD-NEXT: smov w12, v1.h[0] ; CHECK-SD-NEXT: smov x13, v1.h[3] ; CHECK-SD-NEXT: smov w15, v1.h[1] ; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x10, w10, w8 ; CHECK-SD-NEXT: smull x11, w11, w8 ; CHECK-SD-NEXT: asr x9, x9, #37 ; CHECK-SD-NEXT: smull x8, w13, w8 ; CHECK-SD-NEXT: asr x10, x10, #37 ; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 ; CHECK-SD-NEXT: asr x11, x11, #37 ; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 ; CHECK-SD-NEXT: asr x8, x8, #37 ; CHECK-SD-NEXT: msub w9, w9, w14, w12 ; CHECK-SD-NEXT: msub w10, w10, w14, w15 ; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 ; CHECK-SD-NEXT: fmov s0, w9 ; CHECK-SD-NEXT: add w9, w11, w11, lsr #31 ; CHECK-SD-NEXT: smov w11, v1.h[2] ; CHECK-SD-NEXT: msub w9, w9, w14, w11 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: smov w10, v1.h[3] ; CHECK-SD-NEXT: msub w8, w8, w14, w10 ; CHECK-SD-NEXT: mov v0.h[2], w9 ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv4i8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #41 // =0x29 ; CHECK-GI-NEXT: shl v2.4h, v0.4h, #8 ; CHECK-GI-NEXT: mov w9, #7 // =0x7 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: fmov s4, w9 ; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #8 ; CHECK-GI-NEXT: mov v1.b[1], w8 ; CHECK-GI-NEXT: mov v4.b[1], w9 ; CHECK-GI-NEXT: mov v1.b[2], w8 ; CHECK-GI-NEXT: mov v4.b[2], w9 ; CHECK-GI-NEXT: mov v1.b[3], w8 ; CHECK-GI-NEXT: mov w8, #4 // =0x4 ; CHECK-GI-NEXT: mov v4.b[3], w9 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: mul v1.4h, v2.4h, v1.4h ; CHECK-GI-NEXT: mov v3.b[2], w8 ; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #8 ; CHECK-GI-NEXT: mov v3.b[3], w8 ; CHECK-GI-NEXT: mov w8, #100 // =0x64 ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: neg v2.8b, v3.8b ; CHECK-GI-NEXT: dup v3.4h, w8 ; CHECK-GI-NEXT: sshl v1.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: neg v2.8b, v4.8b ; CHECK-GI-NEXT: ushl v2.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-GI-NEXT: add v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: mls v0.4h, v1.4h, v3.4h ; CHECK-GI-NEXT: ret entry: %s = srem <4 x i8> %d, ret <4 x i8> %s } define <8 x i8> @sv8i8_7(<8 x i8> %d, <8 x i8> %e) { ; CHECK-SD-LABEL: sv8i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi v1.8b, #147 ; CHECK-SD-NEXT: movi v2.8b, #7 ; CHECK-SD-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-SD-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-SD-NEXT: add v1.8b, v1.8b, v0.8b ; CHECK-SD-NEXT: sshr v1.8b, v1.8b, #2 ; CHECK-SD-NEXT: usra v1.8b, v1.8b, #7 ; CHECK-SD-NEXT: mls v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv8i8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v1.8b, #147 ; CHECK-GI-NEXT: movi v3.8b, #7 ; CHECK-GI-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: add v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: sshr v2.8b, v1.8b, #2 ; CHECK-GI-NEXT: ushr v2.8b, v2.8b, #7 ; CHECK-GI-NEXT: ssra v2.8b, v1.8b, #2 ; CHECK-GI-NEXT: mls v0.8b, v2.8b, v3.8b ; CHECK-GI-NEXT: ret entry: %s = srem <8 x i8> %d, ret <8 x i8> %s } define <8 x i8> @sv8i8_100(<8 x i8> %d, <8 x i8> %e) { ; CHECK-SD-LABEL: sv8i8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi v1.8b, #41 ; CHECK-SD-NEXT: movi v2.8b, #100 ; CHECK-SD-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-SD-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-SD-NEXT: sshr v1.8b, v1.8b, #4 ; CHECK-SD-NEXT: usra v1.8b, v1.8b, #7 ; CHECK-SD-NEXT: mls v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv8i8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v1.8b, #41 ; CHECK-GI-NEXT: movi v3.8b, #100 ; CHECK-GI-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: sshr v2.8b, v1.8b, #4 ; CHECK-GI-NEXT: ushr v2.8b, v2.8b, #7 ; CHECK-GI-NEXT: ssra v2.8b, v1.8b, #4 ; CHECK-GI-NEXT: mls v0.8b, v2.8b, v3.8b ; CHECK-GI-NEXT: ret entry: %s = srem <8 x i8> %d, ret <8 x i8> %s } define <16 x i8> @sv16i8_7(<16 x i8> %d, <16 x i8> %e) { ; CHECK-SD-LABEL: sv16i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi v1.16b, #147 ; CHECK-SD-NEXT: smull2 v2.8h, v0.16b, v1.16b ; CHECK-SD-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-SD-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: movi v2.16b, #7 ; CHECK-SD-NEXT: add v1.16b, v1.16b, v0.16b ; CHECK-SD-NEXT: sshr v1.16b, v1.16b, #2 ; CHECK-SD-NEXT: usra v1.16b, v1.16b, #7 ; CHECK-SD-NEXT: mls v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv16i8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v1.16b, #147 ; CHECK-GI-NEXT: movi v3.16b, #7 ; CHECK-GI-NEXT: smull2 v2.8h, v0.16b, v1.16b ; CHECK-GI-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-GI-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: add v1.16b, v1.16b, v0.16b ; CHECK-GI-NEXT: sshr v2.16b, v1.16b, #2 ; CHECK-GI-NEXT: ushr v2.16b, v2.16b, #7 ; CHECK-GI-NEXT: ssra v2.16b, v1.16b, #2 ; CHECK-GI-NEXT: mls v0.16b, v2.16b, v3.16b ; CHECK-GI-NEXT: ret entry: %s = srem <16 x i8> %d, ret <16 x i8> %s } define <16 x i8> @sv16i8_100(<16 x i8> %d, <16 x i8> %e) { ; CHECK-SD-LABEL: sv16i8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi v1.16b, #41 ; CHECK-SD-NEXT: smull2 v2.8h, v0.16b, v1.16b ; CHECK-SD-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-SD-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: movi v2.16b, #100 ; CHECK-SD-NEXT: sshr v1.16b, v1.16b, #4 ; CHECK-SD-NEXT: usra v1.16b, v1.16b, #7 ; CHECK-SD-NEXT: mls v0.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv16i8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v1.16b, #41 ; CHECK-GI-NEXT: movi v3.16b, #100 ; CHECK-GI-NEXT: smull2 v2.8h, v0.16b, v1.16b ; CHECK-GI-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-GI-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: sshr v2.16b, v1.16b, #4 ; CHECK-GI-NEXT: ushr v2.16b, v2.16b, #7 ; CHECK-GI-NEXT: ssra v2.16b, v1.16b, #4 ; CHECK-GI-NEXT: mls v0.16b, v2.16b, v3.16b ; CHECK-GI-NEXT: ret entry: %s = srem <16 x i8> %d, ret <16 x i8> %s } define <2 x i8> @uv2i8_7(<2 x i8> %d, <2 x i8> %e) { ; CHECK-SD-LABEL: uv2i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: dup v2.2s, w8 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v2.2s ; CHECK-SD-NEXT: movi v2.2s, #7 ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi d1, #0x0000ff000000ff ; CHECK-GI-NEXT: movi v2.2s, #37 ; CHECK-GI-NEXT: mov w8, #8 // =0x8 ; CHECK-GI-NEXT: and v1.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: mul v1.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: neg v2.4h, v2.4h ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: ushl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: neg v3.8b, v3.8b ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: sub v2.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: mov w9, v2.s[1] ; CHECK-GI-NEXT: mov v2.b[1], w9 ; CHECK-GI-NEXT: ushl v2.8b, v2.8b, v3.8b ; CHECK-GI-NEXT: umov w8, v2.b[0] ; CHECK-GI-NEXT: umov w9, v2.b[1] ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov w8, #2 // =0x2 ; CHECK-GI-NEXT: mov v2.s[1], w9 ; CHECK-GI-NEXT: add v1.2s, v2.2s, v1.2s ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov w9, v1.s[1] ; CHECK-GI-NEXT: mov v2.b[1], w8 ; CHECK-GI-NEXT: mov v1.b[1], w9 ; CHECK-GI-NEXT: neg v2.8b, v2.8b ; CHECK-GI-NEXT: ushl v1.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: movi v2.2s, #7 ; CHECK-GI-NEXT: umov w8, v1.b[0] ; CHECK-GI-NEXT: umov w9, v1.b[1] ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov v1.s[1], w9 ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i8> %d, ret <2 x i8> %s } define <2 x i8> @uv2i8_100(<2 x i8> %d, <2 x i8> %e) { ; CHECK-SD-LABEL: uv2i8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff ; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29 ; CHECK-SD-NEXT: movk w8, #655, lsl #16 ; CHECK-SD-NEXT: dup v2.2s, w8 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v2.2s ; CHECK-SD-NEXT: movi v2.2s, #100 ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi d1, #0x0000ff000000ff ; CHECK-GI-NEXT: movi v2.2s, #41 ; CHECK-GI-NEXT: mov w8, #8 // =0x8 ; CHECK-GI-NEXT: and v1.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: mul v1.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: mov w8, #4 // =0x4 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: neg v2.4h, v2.4h ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: ushl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: neg v2.8b, v3.8b ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ushl v1.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: movi v2.2s, #100 ; CHECK-GI-NEXT: umov w8, v1.b[0] ; CHECK-GI-NEXT: umov w9, v1.b[1] ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov v1.s[1], w9 ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i8> %d, ret <2 x i8> %s } define <3 x i8> @uv3i8_7(<3 x i8> %d, <3 x i8> %e) { ; CHECK-SD-LABEL: uv3i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: and w9, w0, #0xff ; CHECK-SD-NEXT: and w10, w1, #0xff ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: and w12, w2, #0xff ; CHECK-SD-NEXT: umull x11, w9, w8 ; CHECK-SD-NEXT: umull x13, w10, w8 ; CHECK-SD-NEXT: umull x8, w12, w8 ; CHECK-SD-NEXT: lsr x11, x11, #32 ; CHECK-SD-NEXT: lsr x13, x13, #32 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3 ; CHECK-SD-NEXT: sub w13, w13, w13, lsl #3 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: add w0, w9, w11 ; CHECK-SD-NEXT: add w1, w10, w13 ; CHECK-SD-NEXT: add w2, w12, w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv3i8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: and w8, w0, #0xff ; CHECK-GI-NEXT: mov w10, #37 // =0x25 ; CHECK-GI-NEXT: and w9, w1, #0xff ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov w8, #8 // =0x8 ; CHECK-GI-NEXT: fmov s2, w10 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: fmov s0, w0 ; CHECK-GI-NEXT: mov v1.h[1], w9 ; CHECK-GI-NEXT: mov v2.h[1], w10 ; CHECK-GI-NEXT: and w9, w2, #0xff ; CHECK-GI-NEXT: mov v3.h[1], w8 ; CHECK-GI-NEXT: mov v0.h[1], w1 ; CHECK-GI-NEXT: mov v1.h[2], w9 ; CHECK-GI-NEXT: mov v2.h[2], w10 ; CHECK-GI-NEXT: mov v3.h[2], w8 ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: mul v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: neg v2.4h, v3.4h ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: ushl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: sub v2.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: mov v3.b[2], w8 ; CHECK-GI-NEXT: uzp1 v2.8b, v2.8b, v0.8b ; CHECK-GI-NEXT: neg v3.8b, v3.8b ; CHECK-GI-NEXT: ushl v2.8b, v2.8b, v3.8b ; CHECK-GI-NEXT: mov b3, v2.b[1] ; CHECK-GI-NEXT: mov b4, v2.b[2] ; CHECK-GI-NEXT: fmov w8, s3 ; CHECK-GI-NEXT: fmov w9, s4 ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: mov w8, #2 // =0x2 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: mov v2.h[2], w9 ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: add v1.4h, v2.4h, v1.4h ; CHECK-GI-NEXT: mov v3.b[2], w8 ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: neg v2.8b, v3.8b ; CHECK-GI-NEXT: ushl v1.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: mov b2, v1.b[1] ; CHECK-GI-NEXT: mov b3, v1.b[2] ; CHECK-GI-NEXT: fmov w9, s2 ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov v1.h[1], w9 ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: fmov w9, s3 ; CHECK-GI-NEXT: mov v1.h[2], w9 ; CHECK-GI-NEXT: mov v2.h[2], w8 ; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: umov w0, v0.h[0] ; CHECK-GI-NEXT: umov w1, v0.h[1] ; CHECK-GI-NEXT: umov w2, v0.h[2] ; CHECK-GI-NEXT: ret entry: %s = urem <3 x i8> %d, ret <3 x i8> %s } define <3 x i8> @uv3i8_100(<3 x i8> %d, <3 x i8> %e) { ; CHECK-SD-LABEL: uv3i8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29 ; CHECK-SD-NEXT: and w9, w0, #0xff ; CHECK-SD-NEXT: and w10, w1, #0xff ; CHECK-SD-NEXT: movk w8, #655, lsl #16 ; CHECK-SD-NEXT: and w12, w2, #0xff ; CHECK-SD-NEXT: mov w14, #100 // =0x64 ; CHECK-SD-NEXT: umull x11, w9, w8 ; CHECK-SD-NEXT: umull x13, w10, w8 ; CHECK-SD-NEXT: umull x8, w12, w8 ; CHECK-SD-NEXT: lsr x11, x11, #32 ; CHECK-SD-NEXT: lsr x13, x13, #32 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: msub w0, w11, w14, w9 ; CHECK-SD-NEXT: msub w1, w13, w14, w10 ; CHECK-SD-NEXT: msub w2, w8, w14, w12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv3i8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: and w8, w0, #0xff ; CHECK-GI-NEXT: mov w10, #41 // =0x29 ; CHECK-GI-NEXT: and w9, w1, #0xff ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov w8, #8 // =0x8 ; CHECK-GI-NEXT: fmov s1, w10 ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov v0.h[1], w9 ; CHECK-GI-NEXT: mov v1.h[1], w10 ; CHECK-GI-NEXT: and w9, w2, #0xff ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: mov v0.h[2], w9 ; CHECK-GI-NEXT: mov v1.h[2], w10 ; CHECK-GI-NEXT: mov v2.h[2], w8 ; CHECK-GI-NEXT: mov w8, #4 // =0x4 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: neg v1.4h, v2.4h ; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: mov v3.b[2], w8 ; CHECK-GI-NEXT: mov w8, #100 // =0x64 ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: neg v1.8b, v3.8b ; CHECK-GI-NEXT: fmov s3, w0 ; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: mov v3.h[1], w1 ; CHECK-GI-NEXT: mov b1, v0.b[1] ; CHECK-GI-NEXT: mov b2, v0.b[2] ; CHECK-GI-NEXT: mov v3.h[2], w2 ; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov v0.h[1], w9 ; CHECK-GI-NEXT: mov v1.h[1], w8 ; CHECK-GI-NEXT: fmov w9, s2 ; CHECK-GI-NEXT: mov v0.h[2], w9 ; CHECK-GI-NEXT: mov v1.h[2], w8 ; CHECK-GI-NEXT: mls v3.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: umov w0, v3.h[0] ; CHECK-GI-NEXT: umov w1, v3.h[1] ; CHECK-GI-NEXT: umov w2, v3.h[2] ; CHECK-GI-NEXT: ret entry: %s = urem <3 x i8> %d, ret <3 x i8> %s } define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) { ; CHECK-SD-LABEL: uv4i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: umov w9, v0.h[0] ; CHECK-SD-NEXT: umov w10, v0.h[1] ; CHECK-SD-NEXT: umov w13, v0.h[2] ; CHECK-SD-NEXT: umov w15, v0.h[3] ; CHECK-SD-NEXT: umull x11, w9, w8 ; CHECK-SD-NEXT: umull x12, w10, w8 ; CHECK-SD-NEXT: umull x14, w13, w8 ; CHECK-SD-NEXT: lsr x11, x11, #32 ; CHECK-SD-NEXT: umull x8, w15, w8 ; CHECK-SD-NEXT: lsr x12, x12, #32 ; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3 ; CHECK-SD-NEXT: sub w12, w12, w12, lsl #3 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: add w9, w9, w11 ; CHECK-SD-NEXT: fmov s0, w9 ; CHECK-SD-NEXT: add w10, w10, w12 ; CHECK-SD-NEXT: lsr x9, x14, #32 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: add w8, w15, w8 ; CHECK-SD-NEXT: add w9, w13, w9 ; CHECK-SD-NEXT: mov v0.h[2], w9 ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv4i8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #37 // =0x25 ; CHECK-GI-NEXT: movi d2, #0xff00ff00ff00ff ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov v1.b[1], w8 ; CHECK-GI-NEXT: and v2.8b, v0.8b, v2.8b ; CHECK-GI-NEXT: mov v1.b[2], w8 ; CHECK-GI-NEXT: mov v1.b[3], w8 ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: mul v1.4h, v2.4h, v1.4h ; CHECK-GI-NEXT: mov v3.b[2], w8 ; CHECK-GI-NEXT: ushr v2.4h, v1.4h, #8 ; CHECK-GI-NEXT: mov v3.b[3], w8 ; CHECK-GI-NEXT: mov w8, #2 // =0x2 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: sub v2.4h, v0.4h, v2.4h ; CHECK-GI-NEXT: neg v3.8b, v3.8b ; CHECK-GI-NEXT: mov v4.b[1], w8 ; CHECK-GI-NEXT: uzp1 v2.8b, v2.8b, v0.8b ; CHECK-GI-NEXT: mov v4.b[2], w8 ; CHECK-GI-NEXT: ushl v2.8b, v2.8b, v3.8b ; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-GI-NEXT: mov v4.b[3], w8 ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: usra v2.4h, v1.4h, #8 ; CHECK-GI-NEXT: uzp1 v1.8b, v2.8b, v0.8b ; CHECK-GI-NEXT: neg v2.8b, v4.8b ; CHECK-GI-NEXT: ushl v1.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: dup v2.4h, w8 ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: ret entry: %s = urem <4 x i8> %d, ret <4 x i8> %s } define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) { ; CHECK-SD-LABEL: uv4i8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29 ; CHECK-SD-NEXT: mov w14, #100 // =0x64 ; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-SD-NEXT: movk w8, #655, lsl #16 ; CHECK-SD-NEXT: umov w9, v0.h[0] ; CHECK-SD-NEXT: umov w10, v0.h[1] ; CHECK-SD-NEXT: umov w12, v0.h[2] ; CHECK-SD-NEXT: umov w15, v0.h[3] ; CHECK-SD-NEXT: umull x11, w9, w8 ; CHECK-SD-NEXT: umull x13, w10, w8 ; CHECK-SD-NEXT: lsr x11, x11, #32 ; CHECK-SD-NEXT: lsr x13, x13, #32 ; CHECK-SD-NEXT: msub w9, w11, w14, w9 ; CHECK-SD-NEXT: umull x11, w12, w8 ; CHECK-SD-NEXT: msub w10, w13, w14, w10 ; CHECK-SD-NEXT: fmov s0, w9 ; CHECK-SD-NEXT: umull x8, w15, w8 ; CHECK-SD-NEXT: lsr x9, x11, #32 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: msub w9, w9, w14, w12 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: msub w8, w8, w14, w15 ; CHECK-SD-NEXT: mov v0.h[2], w9 ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv4i8_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #41 // =0x29 ; CHECK-GI-NEXT: movi d2, #0xff00ff00ff00ff ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov v1.b[1], w8 ; CHECK-GI-NEXT: and v2.8b, v0.8b, v2.8b ; CHECK-GI-NEXT: mov v1.b[2], w8 ; CHECK-GI-NEXT: mov v1.b[3], w8 ; CHECK-GI-NEXT: mov w8, #4 // =0x4 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: mul v1.4h, v2.4h, v1.4h ; CHECK-GI-NEXT: mov v3.b[2], w8 ; CHECK-GI-NEXT: ushr v1.4h, v1.4h, #8 ; CHECK-GI-NEXT: mov v3.b[3], w8 ; CHECK-GI-NEXT: mov w8, #100 // =0x64 ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: neg v2.8b, v3.8b ; CHECK-GI-NEXT: ushl v1.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: dup v2.4h, w8 ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: ret entry: %s = urem <4 x i8> %d, ret <4 x i8> %s } define <8 x i8> @uv8i8_7(<8 x i8> %d, <8 x i8> %e) { ; CHECK-SD-LABEL: uv8i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi v1.8b, #37 ; CHECK-SD-NEXT: umull v1.8h, v0.8b, v1.8b ; CHECK-SD-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-SD-NEXT: sub v2.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-SD-NEXT: shrn v2.8b, v2.8h, #1 ; CHECK-SD-NEXT: add v1.8b, v2.8b, v1.8b ; CHECK-SD-NEXT: movi v2.8b, #7 ; CHECK-SD-NEXT: ushr v1.8b, v1.8b, #2 ; CHECK-SD-NEXT: mls v0.8b, v1.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv8i8_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: movi v1.8b, #37 ; CHECK-GI-NEXT: umull v1.8h, v0.8b, v1.8b ; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-GI-NEXT: sub v2.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: usra v1.8b, v2.8b, #1 ; CHECK-GI-NEXT: movi v2.8b, #7 ; CHECK-GI-NEXT: ushr v1.8b, v1.8b, #2 ; CHECK-GI-NEXT: mls v0.8b, v1.8b, v2.8b ; CHECK-GI-NEXT: ret entry: %s = urem <8 x i8> %d, ret <8 x i8> %s } define <8 x i8> @uv8i8_100(<8 x i8> %d, <8 x i8> %e) { ; CHECK-LABEL: uv8i8_100: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.8b, #41 ; CHECK-NEXT: movi v2.8b, #100 ; CHECK-NEXT: umull v1.8h, v0.8b, v1.8b ; CHECK-NEXT: shrn v1.8b, v1.8h, #8 ; CHECK-NEXT: ushr v1.8b, v1.8b, #4 ; CHECK-NEXT: mls v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret entry: %s = urem <8 x i8> %d, ret <8 x i8> %s } define <16 x i8> @uv16i8_7(<16 x i8> %d, <16 x i8> %e) { ; CHECK-LABEL: uv16i8_7: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.16b, #37 ; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b ; CHECK-NEXT: umull v1.8h, v0.8b, v1.8b ; CHECK-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; CHECK-NEXT: sub v2.16b, v0.16b, v1.16b ; CHECK-NEXT: usra v1.16b, v2.16b, #1 ; CHECK-NEXT: movi v2.16b, #7 ; CHECK-NEXT: ushr v1.16b, v1.16b, #2 ; CHECK-NEXT: mls v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret entry: %s = urem <16 x i8> %d, ret <16 x i8> %s } define <16 x i8> @uv16i8_100(<16 x i8> %d, <16 x i8> %e) { ; CHECK-LABEL: uv16i8_100: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.16b, #41 ; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b ; CHECK-NEXT: umull v1.8h, v0.8b, v1.8b ; CHECK-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; CHECK-NEXT: movi v2.16b, #100 ; CHECK-NEXT: ushr v1.16b, v1.16b, #4 ; CHECK-NEXT: mls v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret entry: %s = urem <16 x i8> %d, ret <16 x i8> %s } define <2 x i16> @sv2i16_7(<2 x i16> %d, <2 x i16> %e) { ; CHECK-SD-LABEL: sv2i16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v1.2s, v0.2s, #16 ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: movi v3.2s, #7 ; CHECK-SD-NEXT: movk w8, #37449, lsl #16 ; CHECK-SD-NEXT: dup v2.2s, w8 ; CHECK-SD-NEXT: sshr v0.2s, v1.2s, #16 ; CHECK-SD-NEXT: smull v2.2d, v0.2s, v2.2s ; CHECK-SD-NEXT: shrn v2.2s, v2.2d, #32 ; CHECK-SD-NEXT: ssra v2.2s, v1.2s, #16 ; CHECK-SD-NEXT: sshr v1.2s, v2.2s, #2 ; CHECK-SD-NEXT: usra v1.2s, v2.2s, #31 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #18725 // =0x4925 ; CHECK-GI-NEXT: shl v2.2s, v0.2s, #16 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: sshr v2.2s, v2.2s, #16 ; CHECK-GI-NEXT: mov v1.h[1], w8 ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mul v1.2s, v2.2s, v1.2s ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: mov w8, #15 // =0xf ; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #16 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: mov v3.h[1], w8 ; CHECK-GI-NEXT: neg v2.4h, v2.4h ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: sshl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: neg v2.4h, v3.4h ; CHECK-GI-NEXT: dup v3.2s, w8 ; CHECK-GI-NEXT: ushl v2.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-GI-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i16> %d, ret <2 x i16> %s } define <2 x i16> @sv2i16_100(<2 x i16> %d, <2 x i16> %e) { ; CHECK-SD-LABEL: sv2i16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: movi v2.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37 ; CHECK-SD-NEXT: xtn v1.2s, v1.2d ; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #5243 // =0x147b ; CHECK-GI-NEXT: shl v2.2s, v0.2s, #16 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: sshr v2.2s, v2.2s, #16 ; CHECK-GI-NEXT: mov v1.h[1], w8 ; CHECK-GI-NEXT: mov w8, #3 // =0x3 ; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mul v1.2s, v2.2s, v1.2s ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: mov w8, #15 // =0xf ; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #16 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: mov v3.h[1], w8 ; CHECK-GI-NEXT: neg v2.4h, v2.4h ; CHECK-GI-NEXT: mov w8, #100 // =0x64 ; CHECK-GI-NEXT: sshl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: neg v2.4h, v3.4h ; CHECK-GI-NEXT: dup v3.2s, w8 ; CHECK-GI-NEXT: ushl v2.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-GI-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i16> %d, ret <2 x i16> %s } define <3 x i16> @sv3i16_7(<3 x i16> %d, <3 x i16> %e) { ; CHECK-SD-LABEL: sv3i16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: smov x9, v0.h[0] ; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493 ; CHECK-SD-NEXT: smov x10, v0.h[1] ; CHECK-SD-NEXT: movk x8, #37449, lsl #16 ; CHECK-SD-NEXT: smov w12, v0.h[0] ; CHECK-SD-NEXT: smov x11, v0.h[2] ; CHECK-SD-NEXT: smov w13, v0.h[1] ; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x10, w10, w8 ; CHECK-SD-NEXT: smull x8, w11, w8 ; CHECK-SD-NEXT: smov w11, v0.h[2] ; CHECK-SD-NEXT: lsr x9, x9, #32 ; CHECK-SD-NEXT: lsr x10, x10, #32 ; CHECK-SD-NEXT: add w9, w9, w12 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: asr w14, w9, #2 ; CHECK-SD-NEXT: add w10, w10, w13 ; CHECK-SD-NEXT: asr w15, w10, #2 ; CHECK-SD-NEXT: add w8, w8, w11 ; CHECK-SD-NEXT: add w9, w14, w9, lsr #31 ; CHECK-SD-NEXT: asr w14, w8, #2 ; CHECK-SD-NEXT: add w10, w15, w10, lsr #31 ; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3 ; CHECK-SD-NEXT: add w8, w14, w8, lsr #31 ; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3 ; CHECK-SD-NEXT: add w9, w12, w9 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: fmov s0, w9 ; CHECK-SD-NEXT: add w10, w13, w10 ; CHECK-SD-NEXT: add w8, w11, w8 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: mov v0.h[2], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv3i16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: smov w9, v0.h[0] ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: smov w11, v0.h[1] ; CHECK-GI-NEXT: smov w13, v0.h[2] ; CHECK-GI-NEXT: sdiv w10, w9, w8 ; CHECK-GI-NEXT: sdiv w12, w11, w8 ; CHECK-GI-NEXT: lsl w14, w10, #3 ; CHECK-GI-NEXT: sub w10, w14, w10 ; CHECK-GI-NEXT: sub w9, w9, w10 ; CHECK-GI-NEXT: fmov s0, w9 ; CHECK-GI-NEXT: sdiv w8, w13, w8 ; CHECK-GI-NEXT: lsl w15, w12, #3 ; CHECK-GI-NEXT: sub w10, w15, w12 ; CHECK-GI-NEXT: sub w10, w11, w10 ; CHECK-GI-NEXT: mov v0.h[1], w10 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w8, w13, w8 ; CHECK-GI-NEXT: mov v0.h[2], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %s = srem <3 x i16> %d, ret <3 x i16> %s } define <3 x i16> @sv3i16_100(<3 x i16> %d, <3 x i16> %e) { ; CHECK-SD-LABEL: sv3i16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: smov x9, v0.h[0] ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: smov x10, v0.h[1] ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: smov x11, v0.h[2] ; CHECK-SD-NEXT: mov w12, #100 // =0x64 ; CHECK-SD-NEXT: smov w13, v0.h[1] ; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x10, w10, w8 ; CHECK-SD-NEXT: smull x8, w11, w8 ; CHECK-SD-NEXT: smov w11, v0.h[0] ; CHECK-SD-NEXT: asr x9, x9, #37 ; CHECK-SD-NEXT: asr x10, x10, #37 ; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 ; CHECK-SD-NEXT: asr x8, x8, #37 ; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 ; CHECK-SD-NEXT: msub w9, w9, w12, w11 ; CHECK-SD-NEXT: smov w11, v0.h[2] ; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 ; CHECK-SD-NEXT: msub w10, w10, w12, w13 ; CHECK-SD-NEXT: msub w8, w8, w12, w11 ; CHECK-SD-NEXT: fmov s0, w9 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: mov v0.h[2], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv3i16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: smov w9, v0.h[0] ; CHECK-GI-NEXT: mov w8, #100 // =0x64 ; CHECK-GI-NEXT: smov w11, v0.h[1] ; CHECK-GI-NEXT: smov w13, v0.h[2] ; CHECK-GI-NEXT: sdiv w10, w9, w8 ; CHECK-GI-NEXT: sdiv w12, w11, w8 ; CHECK-GI-NEXT: msub w9, w10, w8, w9 ; CHECK-GI-NEXT: fmov s0, w9 ; CHECK-GI-NEXT: sdiv w14, w13, w8 ; CHECK-GI-NEXT: msub w10, w12, w8, w11 ; CHECK-GI-NEXT: mov v0.h[1], w10 ; CHECK-GI-NEXT: msub w8, w14, w8, w13 ; CHECK-GI-NEXT: mov v0.h[2], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %s = srem <3 x i16> %d, ret <3 x i16> %s } define <4 x i16> @sv4i16_7(<4 x i16> %d, <4 x i16> %e) { ; CHECK-SD-LABEL: sv4i16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: movi v2.4h, #7 ; CHECK-SD-NEXT: dup v1.4h, w8 ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #17 ; CHECK-SD-NEXT: xtn v1.4h, v1.4s ; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15 ; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv4i16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI44_0 ; CHECK-GI-NEXT: movi v3.4h, #7 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI44_0] ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: sshr v2.4h, v1.4h, #1 ; CHECK-GI-NEXT: ushr v2.4h, v2.4h, #15 ; CHECK-GI-NEXT: ssra v2.4h, v1.4h, #1 ; CHECK-GI-NEXT: mls v0.4h, v2.4h, v3.4h ; CHECK-GI-NEXT: ret entry: %s = srem <4 x i16> %d, ret <4 x i16> %s } define <4 x i16> @sv4i16_100(<4 x i16> %d, <4 x i16> %e) { ; CHECK-SD-LABEL: sv4i16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #5243 // =0x147b ; CHECK-SD-NEXT: movi v2.4h, #100 ; CHECK-SD-NEXT: dup v1.4h, w8 ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #19 ; CHECK-SD-NEXT: xtn v1.4h, v1.4s ; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15 ; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv4i16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI45_0 ; CHECK-GI-NEXT: movi v3.4h, #100 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI45_0] ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: sshr v2.4h, v1.4h, #3 ; CHECK-GI-NEXT: ushr v2.4h, v2.4h, #15 ; CHECK-GI-NEXT: ssra v2.4h, v1.4h, #3 ; CHECK-GI-NEXT: mls v0.4h, v2.4h, v3.4h ; CHECK-GI-NEXT: ret entry: %s = srem <4 x i16> %d, ret <4 x i16> %s } define <8 x i16> @sv8i16_7(<8 x i16> %d, <8 x i16> %e) { ; CHECK-SD-LABEL: sv8i16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: dup v1.8h, w8 ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: movi v2.8h, #7 ; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #1 ; CHECK-SD-NEXT: usra v1.8h, v1.8h, #15 ; CHECK-SD-NEXT: mls v0.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv8i16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI46_0 ; CHECK-GI-NEXT: movi v3.8h, #7 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI46_0] ; CHECK-GI-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: sshr v2.8h, v1.8h, #1 ; CHECK-GI-NEXT: ushr v2.8h, v2.8h, #15 ; CHECK-GI-NEXT: ssra v2.8h, v1.8h, #1 ; CHECK-GI-NEXT: mls v0.8h, v2.8h, v3.8h ; CHECK-GI-NEXT: ret entry: %s = srem <8 x i16> %d, ret <8 x i16> %s } define <8 x i16> @sv8i16_100(<8 x i16> %d, <8 x i16> %e) { ; CHECK-SD-LABEL: sv8i16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #5243 // =0x147b ; CHECK-SD-NEXT: dup v1.8h, w8 ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: movi v2.8h, #100 ; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #3 ; CHECK-SD-NEXT: usra v1.8h, v1.8h, #15 ; CHECK-SD-NEXT: mls v0.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv8i16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI47_0 ; CHECK-GI-NEXT: movi v3.8h, #100 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI47_0] ; CHECK-GI-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: sshr v2.8h, v1.8h, #3 ; CHECK-GI-NEXT: ushr v2.8h, v2.8h, #15 ; CHECK-GI-NEXT: ssra v2.8h, v1.8h, #3 ; CHECK-GI-NEXT: mls v0.8h, v2.8h, v3.8h ; CHECK-GI-NEXT: ret entry: %s = srem <8 x i16> %d, ret <8 x i16> %s } define <2 x i16> @uv2i16_7(<2 x i16> %d, <2 x i16> %e) { ; CHECK-SD-LABEL: uv2i16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: dup v2.2s, w8 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v2.2s ; CHECK-SD-NEXT: movi v2.2s, #7 ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #9363 // =0x2493 ; CHECK-GI-NEXT: movi d2, #0x00ffff0000ffff ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov v1.h[1], w8 ; CHECK-GI-NEXT: and v2.8b, v0.8b, v2.8b ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mov v3.h[1], w8 ; CHECK-GI-NEXT: mov w8, #2 // =0x2 ; CHECK-GI-NEXT: mul v1.2s, v2.2s, v1.2s ; CHECK-GI-NEXT: neg v3.4h, v3.4h ; CHECK-GI-NEXT: ushr v2.2s, v1.2s, #16 ; CHECK-GI-NEXT: sub v2.2s, v0.2s, v2.2s ; CHECK-GI-NEXT: uzp1 v2.4h, v2.4h, v0.4h ; CHECK-GI-NEXT: ushl v2.4h, v2.4h, v3.4h ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-GI-NEXT: mov v3.h[1], w8 ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: usra v2.2s, v1.2s, #16 ; CHECK-GI-NEXT: uzp1 v1.4h, v2.4h, v0.4h ; CHECK-GI-NEXT: neg v2.4h, v3.4h ; CHECK-GI-NEXT: ushl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: dup v2.2s, w8 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i16> %d, ret <2 x i16> %s } define <2 x i16> @uv2i16_100(<2 x i16> %d, <2 x i16> %e) { ; CHECK-SD-LABEL: uv2i16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29 ; CHECK-SD-NEXT: movk w8, #655, lsl #16 ; CHECK-SD-NEXT: dup v2.2s, w8 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v2.2s ; CHECK-SD-NEXT: movi v2.2s, #100 ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #2 // =0x2 ; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov v1.h[1], w8 ; CHECK-GI-NEXT: mov w8, #5243 // =0x147b ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: neg v1.4h, v1.4h ; CHECK-GI-NEXT: mov v3.h[1], w8 ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: ushl v1.4h, v2.4h, v1.4h ; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mul v1.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: mov v2.h[1], w8 ; CHECK-GI-NEXT: mov w8, #100 // =0x64 ; CHECK-GI-NEXT: ushr v1.2s, v1.2s, #16 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: neg v2.4h, v2.4h ; CHECK-GI-NEXT: ushl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: dup v2.2s, w8 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i16> %d, ret <2 x i16> %s } define <3 x i16> @uv3i16_7(<3 x i16> %d, <3 x i16> %e) { ; CHECK-SD-LABEL: uv3i16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: umov w9, v0.h[0] ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: umov w10, v0.h[1] ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: umov w12, v0.h[2] ; CHECK-SD-NEXT: umull x11, w9, w8 ; CHECK-SD-NEXT: umull x13, w10, w8 ; CHECK-SD-NEXT: umull x8, w12, w8 ; CHECK-SD-NEXT: lsr x11, x11, #32 ; CHECK-SD-NEXT: lsr x13, x13, #32 ; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: sub w13, w13, w13, lsl #3 ; CHECK-SD-NEXT: add w9, w9, w11 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: fmov s0, w9 ; CHECK-SD-NEXT: add w10, w10, w13 ; CHECK-SD-NEXT: add w8, w12, w8 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: mov v0.h[2], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv3i16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: umov w9, v0.h[0] ; CHECK-GI-NEXT: mov w8, #9363 // =0x2493 ; CHECK-GI-NEXT: umov w10, v0.h[1] ; CHECK-GI-NEXT: fmov s2, w8 ; CHECK-GI-NEXT: umov w11, v0.h[2] ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov w9, #16 // =0x10 ; CHECK-GI-NEXT: mov v2.s[1], w8 ; CHECK-GI-NEXT: fmov s3, w9 ; CHECK-GI-NEXT: mov v1.s[1], w10 ; CHECK-GI-NEXT: mov v3.s[1], w9 ; CHECK-GI-NEXT: mov v2.s[2], w8 ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov v1.s[2], w11 ; CHECK-GI-NEXT: mov v3.s[2], w9 ; CHECK-GI-NEXT: mov w9, #2 // =0x2 ; CHECK-GI-NEXT: mul v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: neg v2.4s, v3.4s ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: mov v3.h[1], w8 ; CHECK-GI-NEXT: ushl v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: fmov s2, w9 ; CHECK-GI-NEXT: xtn v1.4h, v1.4s ; CHECK-GI-NEXT: mov v2.h[1], w9 ; CHECK-GI-NEXT: mov v3.h[2], w8 ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: sub v4.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: mov v2.h[2], w9 ; CHECK-GI-NEXT: neg v3.4h, v3.4h ; CHECK-GI-NEXT: ushl v3.4h, v4.4h, v3.4h ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: neg v2.4h, v2.4h ; CHECK-GI-NEXT: mov v4.h[1], w8 ; CHECK-GI-NEXT: add v1.4h, v3.4h, v1.4h ; CHECK-GI-NEXT: ushl v1.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: mov v4.h[2], w8 ; CHECK-GI-NEXT: mls v0.4h, v1.4h, v4.4h ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %s = urem <3 x i16> %d, ret <3 x i16> %s } define <3 x i16> @uv3i16_100(<3 x i16> %d, <3 x i16> %e) { ; CHECK-SD-LABEL: uv3i16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: umov w9, v0.h[0] ; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29 ; CHECK-SD-NEXT: umov w10, v0.h[1] ; CHECK-SD-NEXT: movk w8, #655, lsl #16 ; CHECK-SD-NEXT: umov w12, v0.h[2] ; CHECK-SD-NEXT: mov w14, #100 // =0x64 ; CHECK-SD-NEXT: umull x11, w9, w8 ; CHECK-SD-NEXT: umull x13, w10, w8 ; CHECK-SD-NEXT: umull x8, w12, w8 ; CHECK-SD-NEXT: lsr x11, x11, #32 ; CHECK-SD-NEXT: msub w9, w11, w14, w9 ; CHECK-SD-NEXT: lsr x11, x13, #32 ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: msub w10, w11, w14, w10 ; CHECK-SD-NEXT: fmov s0, w9 ; CHECK-SD-NEXT: msub w8, w8, w14, w12 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: mov v0.h[2], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv3i16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #2 // =0x2 ; CHECK-GI-NEXT: mov w11, #5243 // =0x147b ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: fmov s2, w11 ; CHECK-GI-NEXT: mov v1.h[1], w8 ; CHECK-GI-NEXT: mov v2.s[1], w11 ; CHECK-GI-NEXT: mov v1.h[2], w8 ; CHECK-GI-NEXT: mov v2.s[2], w11 ; CHECK-GI-NEXT: neg v1.4h, v1.4h ; CHECK-GI-NEXT: ushl v1.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: umov w8, v1.h[0] ; CHECK-GI-NEXT: umov w9, v1.h[1] ; CHECK-GI-NEXT: umov w10, v1.h[2] ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: mov w8, #16 // =0x10 ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: mov v1.s[1], w9 ; CHECK-GI-NEXT: mov w9, #100 // =0x64 ; CHECK-GI-NEXT: mov v3.s[1], w8 ; CHECK-GI-NEXT: mov v1.s[2], w10 ; CHECK-GI-NEXT: mov v3.s[2], w8 ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: mul v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: mov v4.h[1], w8 ; CHECK-GI-NEXT: neg v2.4s, v3.4s ; CHECK-GI-NEXT: ushl v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: fmov s2, w9 ; CHECK-GI-NEXT: mov v4.h[2], w8 ; CHECK-GI-NEXT: mov v2.h[1], w9 ; CHECK-GI-NEXT: xtn v1.4h, v1.4s ; CHECK-GI-NEXT: neg v3.4h, v4.4h ; CHECK-GI-NEXT: mov v2.h[2], w9 ; CHECK-GI-NEXT: ushl v1.4h, v1.4h, v3.4h ; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: ret entry: %s = urem <3 x i16> %d, ret <3 x i16> %s } define <4 x i16> @uv4i16_7(<4 x i16> %d, <4 x i16> %e) { ; CHECK-SD-LABEL: uv4i16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: dup v1.4h, w8 ; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-SD-NEXT: sub v2.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-SD-NEXT: shrn v2.4h, v2.4s, #1 ; CHECK-SD-NEXT: add v1.4h, v2.4h, v1.4h ; CHECK-SD-NEXT: movi v2.4h, #7 ; CHECK-SD-NEXT: ushr v1.4h, v1.4h, #2 ; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv4i16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI52_0 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI52_0] ; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: sub v2.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: usra v1.4h, v2.4h, #1 ; CHECK-GI-NEXT: movi v2.4h, #7 ; CHECK-GI-NEXT: ushr v1.4h, v1.4h, #2 ; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: ret entry: %s = urem <4 x i16> %d, ret <4 x i16> %s } define <4 x i16> @uv4i16_100(<4 x i16> %d, <4 x i16> %e) { ; CHECK-SD-LABEL: uv4i16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #5243 // =0x147b ; CHECK-SD-NEXT: ushr v2.4h, v0.4h, #2 ; CHECK-SD-NEXT: dup v1.4h, w8 ; CHECK-SD-NEXT: umull v1.4s, v2.4h, v1.4h ; CHECK-SD-NEXT: movi v2.4h, #100 ; CHECK-SD-NEXT: ushr v1.4s, v1.4s, #17 ; CHECK-SD-NEXT: xtn v1.4h, v1.4s ; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv4i16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI53_0 ; CHECK-GI-NEXT: ushr v1.4h, v0.4h, #2 ; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI53_0] ; CHECK-GI-NEXT: umull v1.4s, v1.4h, v2.4h ; CHECK-GI-NEXT: movi v2.4h, #100 ; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-GI-NEXT: ushr v1.4h, v1.4h, #1 ; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-GI-NEXT: ret entry: %s = urem <4 x i16> %d, ret <4 x i16> %s } define <8 x i16> @uv8i16_7(<8 x i16> %d, <8 x i16> %e) { ; CHECK-SD-LABEL: uv8i16_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: dup v1.8h, w8 ; CHECK-SD-NEXT: umull2 v2.4s, v0.8h, v1.8h ; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: sub v2.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: usra v1.8h, v2.8h, #1 ; CHECK-SD-NEXT: movi v2.8h, #7 ; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #2 ; CHECK-SD-NEXT: mls v0.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv8i16_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI54_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI54_0] ; CHECK-GI-NEXT: umull2 v2.4s, v0.8h, v1.8h ; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: sub v2.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: usra v1.8h, v2.8h, #1 ; CHECK-GI-NEXT: movi v2.8h, #7 ; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #2 ; CHECK-GI-NEXT: mls v0.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: ret entry: %s = urem <8 x i16> %d, ret <8 x i16> %s } define <8 x i16> @uv8i16_100(<8 x i16> %d, <8 x i16> %e) { ; CHECK-SD-LABEL: uv8i16_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #5243 // =0x147b ; CHECK-SD-NEXT: ushr v2.8h, v0.8h, #2 ; CHECK-SD-NEXT: dup v1.8h, w8 ; CHECK-SD-NEXT: umull2 v3.4s, v2.8h, v1.8h ; CHECK-SD-NEXT: umull v1.4s, v2.4h, v1.4h ; CHECK-SD-NEXT: movi v2.8h, #100 ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v3.8h ; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #1 ; CHECK-SD-NEXT: mls v0.8h, v1.8h, v2.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv8i16_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI55_0 ; CHECK-GI-NEXT: ushr v1.8h, v0.8h, #2 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI55_0] ; CHECK-GI-NEXT: umull2 v3.4s, v1.8h, v2.8h ; CHECK-GI-NEXT: umull v1.4s, v1.4h, v2.4h ; CHECK-GI-NEXT: movi v2.8h, #100 ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v3.8h ; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #1 ; CHECK-GI-NEXT: mls v0.8h, v1.8h, v2.8h ; CHECK-GI-NEXT: ret entry: %s = urem <8 x i16> %d, ret <8 x i16> %s } define <2 x i32> @sv2i32_7(<2 x i32> %d, <2 x i32> %e) { ; CHECK-SD-LABEL: sv2i32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: movi v3.2s, #7 ; CHECK-SD-NEXT: movk w8, #37449, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-SD-NEXT: add v1.2s, v1.2s, v0.2s ; CHECK-SD-NEXT: sshr v2.2s, v1.2s, #2 ; CHECK-SD-NEXT: usra v2.2s, v1.2s, #31 ; CHECK-SD-NEXT: mls v0.2s, v2.2s, v3.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI56_0 ; CHECK-GI-NEXT: movi v3.2s, #7 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI56_0] ; CHECK-GI-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: add v1.2s, v1.2s, v0.2s ; CHECK-GI-NEXT: sshr v2.2s, v1.2s, #2 ; CHECK-GI-NEXT: ushr v2.2s, v2.2s, #31 ; CHECK-GI-NEXT: ssra v2.2s, v1.2s, #2 ; CHECK-GI-NEXT: mls v0.2s, v2.2s, v3.2s ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i32> %d, ret <2 x i32> %s } define <2 x i32> @sv2i32_100(<2 x i32> %d, <2 x i32> %e) { ; CHECK-SD-LABEL: sv2i32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: movi v2.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37 ; CHECK-SD-NEXT: xtn v1.2s, v1.2d ; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i32_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI57_0 ; CHECK-GI-NEXT: movi v3.2s, #100 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI57_0] ; CHECK-GI-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: sshr v2.2s, v1.2s, #5 ; CHECK-GI-NEXT: ushr v2.2s, v2.2s, #31 ; CHECK-GI-NEXT: ssra v2.2s, v1.2s, #5 ; CHECK-GI-NEXT: mls v0.2s, v2.2s, v3.2s ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i32> %d, ret <2 x i32> %s } define <3 x i32> @sv3i32_7(<3 x i32> %d, <3 x i32> %e) { ; CHECK-SD-LABEL: sv3i32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: mov w9, v0.s[2] ; CHECK-SD-NEXT: movi v3.2s, #7 ; CHECK-SD-NEXT: movk w8, #37449, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: smull x8, w9, w8 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: add w8, w8, w9 ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-SD-NEXT: asr w10, w8, #2 ; CHECK-SD-NEXT: add w8, w10, w8, lsr #31 ; CHECK-SD-NEXT: add v1.2s, v1.2s, v0.2s ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: sshr v2.2s, v1.2s, #2 ; CHECK-SD-NEXT: add w8, w9, w8 ; CHECK-SD-NEXT: usra v2.2s, v1.2s, #31 ; CHECK-SD-NEXT: mls v0.2s, v2.2s, v3.2s ; CHECK-SD-NEXT: mov v0.s[2], w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv3i32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w9, s0 ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: mov s0, v0.s[2] ; CHECK-GI-NEXT: sdiv w10, w9, w8 ; CHECK-GI-NEXT: fmov w11, s1 ; CHECK-GI-NEXT: fmov w13, s0 ; CHECK-GI-NEXT: sdiv w12, w11, w8 ; CHECK-GI-NEXT: lsl w14, w10, #3 ; CHECK-GI-NEXT: sub w10, w14, w10 ; CHECK-GI-NEXT: sub w9, w9, w10 ; CHECK-GI-NEXT: fmov s0, w9 ; CHECK-GI-NEXT: sdiv w8, w13, w8 ; CHECK-GI-NEXT: lsl w15, w12, #3 ; CHECK-GI-NEXT: sub w10, w15, w12 ; CHECK-GI-NEXT: sub w10, w11, w10 ; CHECK-GI-NEXT: mov v0.s[1], w10 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 ; CHECK-GI-NEXT: sub w8, w13, w8 ; CHECK-GI-NEXT: mov v0.s[2], w8 ; CHECK-GI-NEXT: ret entry: %s = srem <3 x i32> %d, ret <3 x i32> %s } define <3 x i32> @sv3i32_100(<3 x i32> %d, <3 x i32> %e) { ; CHECK-SD-LABEL: sv3i32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: mov w9, v0.s[2] ; CHECK-SD-NEXT: movi v2.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: mov w10, #100 // =0x64 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: smull x8, w9, w8 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: asr x8, x8, #37 ; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 ; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37 ; CHECK-SD-NEXT: msub w8, w8, w10, w9 ; CHECK-SD-NEXT: xtn v1.2s, v1.2d ; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: mov v0.s[2], w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv3i32_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: fmov w9, s0 ; CHECK-GI-NEXT: mov w8, #100 // =0x64 ; CHECK-GI-NEXT: mov s0, v0.s[2] ; CHECK-GI-NEXT: sdiv w10, w9, w8 ; CHECK-GI-NEXT: fmov w11, s1 ; CHECK-GI-NEXT: fmov w13, s0 ; CHECK-GI-NEXT: sdiv w12, w11, w8 ; CHECK-GI-NEXT: msub w9, w10, w8, w9 ; CHECK-GI-NEXT: fmov s0, w9 ; CHECK-GI-NEXT: sdiv w14, w13, w8 ; CHECK-GI-NEXT: msub w10, w12, w8, w11 ; CHECK-GI-NEXT: mov v0.s[1], w10 ; CHECK-GI-NEXT: msub w8, w14, w8, w13 ; CHECK-GI-NEXT: mov v0.s[2], w8 ; CHECK-GI-NEXT: ret entry: %s = srem <3 x i32> %d, ret <3 x i32> %s } define <4 x i32> @sv4i32_7(<4 x i32> %d, <4 x i32> %e) { ; CHECK-SD-LABEL: sv4i32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 ; CHECK-SD-NEXT: movi v3.4s, #7 ; CHECK-SD-NEXT: movk w8, #37449, lsl #16 ; CHECK-SD-NEXT: dup v1.4s, w8 ; CHECK-SD-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: add v1.4s, v1.4s, v0.4s ; CHECK-SD-NEXT: sshr v2.4s, v1.4s, #2 ; CHECK-SD-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-SD-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv4i32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI60_0 ; CHECK-GI-NEXT: movi v3.4s, #7 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI60_0] ; CHECK-GI-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-GI-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-GI-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: add v1.4s, v1.4s, v0.4s ; CHECK-GI-NEXT: sshr v2.4s, v1.4s, #2 ; CHECK-GI-NEXT: ushr v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: ssra v2.4s, v1.4s, #2 ; CHECK-GI-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: ret entry: %s = srem <4 x i32> %d, ret <4 x i32> %s } define <4 x i32> @sv4i32_100(<4 x i32> %d, <4 x i32> %e) { ; CHECK-SD-LABEL: sv4i32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: movi v3.4s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: dup v1.4s, w8 ; CHECK-SD-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: sshr v2.4s, v1.4s, #5 ; CHECK-SD-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-SD-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv4i32_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI61_0 ; CHECK-GI-NEXT: movi v3.4s, #100 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI61_0] ; CHECK-GI-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-GI-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-GI-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: sshr v2.4s, v1.4s, #5 ; CHECK-GI-NEXT: ushr v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: ssra v2.4s, v1.4s, #5 ; CHECK-GI-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: ret entry: %s = srem <4 x i32> %d, ret <4 x i32> %s } define <2 x i32> @uv2i32_7(<2 x i32> %d, <2 x i32> %e) { ; CHECK-SD-LABEL: uv2i32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-SD-NEXT: sub v2.2s, v0.2s, v1.2s ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-SD-NEXT: shrn v2.2s, v2.2d, #1 ; CHECK-SD-NEXT: add v1.2s, v2.2s, v1.2s ; CHECK-SD-NEXT: movi v2.2s, #7 ; CHECK-SD-NEXT: ushr v1.2s, v1.2s, #2 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI62_0 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI62_0] ; CHECK-GI-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: sub v2.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: usra v1.2s, v2.2s, #1 ; CHECK-GI-NEXT: movi v2.2s, #7 ; CHECK-GI-NEXT: ushr v1.2s, v1.2s, #2 ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i32> %d, ret <2 x i32> %s } define <2 x i32> @uv2i32_100(<2 x i32> %d, <2 x i32> %e) { ; CHECK-SD-LABEL: uv2i32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: movi v2.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: ushr v1.2d, v1.2d, #37 ; CHECK-SD-NEXT: xtn v1.2s, v1.2d ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i32_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI63_0 ; CHECK-GI-NEXT: movi v2.2s, #100 ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI63_0] ; CHECK-GI-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-GI-NEXT: ushr v1.2s, v1.2s, #5 ; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i32> %d, ret <2 x i32> %s } define <3 x i32> @uv3i32_7(<3 x i32> %d, <3 x i32> %e) { ; CHECK-SD-LABEL: uv3i32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: mov w9, v0.s[2] ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: umull x8, w9, w8 ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: lsr x8, x8, #32 ; CHECK-SD-NEXT: sub w10, w9, w8 ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-SD-NEXT: add w8, w8, w10, lsr #1 ; CHECK-SD-NEXT: lsr w8, w8, #2 ; CHECK-SD-NEXT: sub v2.2s, v0.2s, v1.2s ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-SD-NEXT: add w8, w9, w8 ; CHECK-SD-NEXT: shrn v2.2s, v2.2d, #1 ; CHECK-SD-NEXT: add v1.2s, v2.2s, v1.2s ; CHECK-SD-NEXT: movi v2.2s, #7 ; CHECK-SD-NEXT: ushr v1.2s, v1.2s, #2 ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: mov v0.s[2], w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv3i32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: adrp x8, .LCPI64_0 ; CHECK-GI-NEXT: mov w9, #18725 // =0x4925 ; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI64_0] ; CHECK-GI-NEXT: mov w8, v0.s[2] ; CHECK-GI-NEXT: movk w9, #9362, lsl #16 ; CHECK-GI-NEXT: mov w10, #1 // =0x1 ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: umull x8, w8, w9 ; CHECK-GI-NEXT: umull v1.2d, v1.2s, v2.2s ; CHECK-GI-NEXT: lsr x8, x8, #32 ; CHECK-GI-NEXT: ushr v1.2d, v1.2d, #32 ; CHECK-GI-NEXT: mov d2, v1.d[1] ; CHECK-GI-NEXT: fmov x9, d1 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov w9, #2 // =0x2 ; CHECK-GI-NEXT: fmov x11, d2 ; CHECK-GI-NEXT: fmov s2, w10 ; CHECK-GI-NEXT: fmov s3, w9 ; CHECK-GI-NEXT: mov v1.s[1], w11 ; CHECK-GI-NEXT: mov v2.s[1], w10 ; CHECK-GI-NEXT: mov v3.s[1], w9 ; CHECK-GI-NEXT: mov v1.s[2], w8 ; CHECK-GI-NEXT: mov v2.s[2], w10 ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: mov v3.s[2], w9 ; CHECK-GI-NEXT: sub v4.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: neg v2.4s, v2.4s ; CHECK-GI-NEXT: ushl v2.4s, v4.4s, v2.4s ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: mov v4.s[1], w8 ; CHECK-GI-NEXT: add v1.4s, v2.4s, v1.4s ; CHECK-GI-NEXT: neg v2.4s, v3.4s ; CHECK-GI-NEXT: ushl v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: mov v4.s[2], w8 ; CHECK-GI-NEXT: mls v0.4s, v1.4s, v4.4s ; CHECK-GI-NEXT: ret entry: %s = urem <3 x i32> %d, ret <3 x i32> %s } define <3 x i32> @uv3i32_100(<3 x i32> %d, <3 x i32> %e) { ; CHECK-SD-LABEL: uv3i32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: mov w9, v0.s[2] ; CHECK-SD-NEXT: movi v2.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: mov w10, #100 // =0x64 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: umull x8, w9, w8 ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: lsr x8, x8, #37 ; CHECK-SD-NEXT: msub w8, w8, w10, w9 ; CHECK-SD-NEXT: ushr v1.2d, v1.2d, #37 ; CHECK-SD-NEXT: xtn v1.2s, v1.2d ; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: mov v0.s[2], w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv3i32_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: adrp x8, .LCPI65_0 ; CHECK-GI-NEXT: mov w9, v0.s[2] ; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI65_0] ; CHECK-GI-NEXT: mov w8, #5 // =0x5 ; CHECK-GI-NEXT: mov w10, #34079 // =0x851f ; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: movk w10, #20971, lsl #16 ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: umull x9, w9, w10 ; CHECK-GI-NEXT: mov v3.s[1], w8 ; CHECK-GI-NEXT: umull v1.2d, v1.2s, v2.2s ; CHECK-GI-NEXT: mov v3.s[2], w8 ; CHECK-GI-NEXT: lsr x8, x9, #32 ; CHECK-GI-NEXT: ushr v1.2d, v1.2d, #32 ; CHECK-GI-NEXT: neg v3.4s, v3.4s ; CHECK-GI-NEXT: mov d2, v1.d[1] ; CHECK-GI-NEXT: fmov x11, d1 ; CHECK-GI-NEXT: fmov s1, w11 ; CHECK-GI-NEXT: fmov x10, d2 ; CHECK-GI-NEXT: mov v1.s[1], w10 ; CHECK-GI-NEXT: mov w10, #100 // =0x64 ; CHECK-GI-NEXT: fmov s2, w10 ; CHECK-GI-NEXT: mov v2.s[1], w10 ; CHECK-GI-NEXT: mov v1.s[2], w8 ; CHECK-GI-NEXT: mov v2.s[2], w10 ; CHECK-GI-NEXT: ushl v1.4s, v1.4s, v3.4s ; CHECK-GI-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: ret entry: %s = urem <3 x i32> %d, ret <3 x i32> %s } define <4 x i32> @uv4i32_7(<4 x i32> %d, <4 x i32> %e) { ; CHECK-SD-LABEL: uv4i32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: dup v1.4s, w8 ; CHECK-SD-NEXT: umull2 v2.2d, v0.4s, v1.4s ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: sub v2.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: usra v1.4s, v2.4s, #1 ; CHECK-SD-NEXT: movi v2.4s, #7 ; CHECK-SD-NEXT: ushr v1.4s, v1.4s, #2 ; CHECK-SD-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv4i32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI66_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI66_0] ; CHECK-GI-NEXT: umull2 v2.2d, v0.4s, v1.4s ; CHECK-GI-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-GI-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: sub v2.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: usra v1.4s, v2.4s, #1 ; CHECK-GI-NEXT: movi v2.4s, #7 ; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #2 ; CHECK-GI-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: ret entry: %s = urem <4 x i32> %d, ret <4 x i32> %s } define <4 x i32> @uv4i32_100(<4 x i32> %d, <4 x i32> %e) { ; CHECK-SD-LABEL: uv4i32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: dup v1.4s, w8 ; CHECK-SD-NEXT: umull2 v2.2d, v0.4s, v1.4s ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-SD-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: movi v2.4s, #100 ; CHECK-SD-NEXT: ushr v1.4s, v1.4s, #5 ; CHECK-SD-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv4i32_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI67_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI67_0] ; CHECK-GI-NEXT: umull2 v2.2d, v0.4s, v1.4s ; CHECK-GI-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-GI-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: movi v2.4s, #100 ; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #5 ; CHECK-GI-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-GI-NEXT: ret entry: %s = urem <4 x i32> %d, ret <4 x i32> %s } define <2 x i64> @sv2i64_7(<2 x i64> %d, <2 x i64> %e) { ; CHECK-SD-LABEL: sv2i64_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov x8, #18725 // =0x4925 ; CHECK-SD-NEXT: fmov x10, d0 ; CHECK-SD-NEXT: mov x9, v0.d[1] ; CHECK-SD-NEXT: movk x8, #9362, lsl #16 ; CHECK-SD-NEXT: movk x8, #37449, lsl #32 ; CHECK-SD-NEXT: movk x8, #18724, lsl #48 ; CHECK-SD-NEXT: smulh x11, x10, x8 ; CHECK-SD-NEXT: smulh x8, x9, x8 ; CHECK-SD-NEXT: asr x12, x11, #1 ; CHECK-SD-NEXT: add x11, x12, x11, lsr #63 ; CHECK-SD-NEXT: asr x13, x8, #1 ; CHECK-SD-NEXT: sub x11, x11, x11, lsl #3 ; CHECK-SD-NEXT: add x8, x13, x8, lsr #63 ; CHECK-SD-NEXT: add x10, x10, x11 ; CHECK-SD-NEXT: sub x8, x8, x8, lsl #3 ; CHECK-SD-NEXT: fmov d0, x10 ; CHECK-SD-NEXT: add x8, x9, x8 ; CHECK-SD-NEXT: mov v0.d[1], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i64_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: fmov x9, d0 ; CHECK-GI-NEXT: mov w8, #7 // =0x7 ; CHECK-GI-NEXT: mov x10, v0.d[1] ; CHECK-GI-NEXT: sdiv x9, x9, x8 ; CHECK-GI-NEXT: sdiv x8, x10, x8 ; CHECK-GI-NEXT: fmov d1, x9 ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: adrp x8, .LCPI68_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI68_0] ; CHECK-GI-NEXT: fmov x11, d2 ; CHECK-GI-NEXT: mov x9, v2.d[1] ; CHECK-GI-NEXT: fmov x10, d1 ; CHECK-GI-NEXT: mov x8, v1.d[1] ; CHECK-GI-NEXT: mul x10, x10, x11 ; CHECK-GI-NEXT: mul x8, x8, x9 ; CHECK-GI-NEXT: fmov d1, x10 ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i64> %d, ret <2 x i64> %s } define <2 x i64> @sv2i64_100(<2 x i64> %d, <2 x i64> %e) { ; CHECK-SD-LABEL: sv2i64_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov x8, #55051 // =0xd70b ; CHECK-SD-NEXT: fmov x10, d0 ; CHECK-SD-NEXT: mov x9, v0.d[1] ; CHECK-SD-NEXT: movk x8, #28835, lsl #16 ; CHECK-SD-NEXT: movk x8, #2621, lsl #32 ; CHECK-SD-NEXT: movk x8, #41943, lsl #48 ; CHECK-SD-NEXT: smulh x11, x10, x8 ; CHECK-SD-NEXT: smulh x8, x9, x8 ; CHECK-SD-NEXT: add x11, x11, x10 ; CHECK-SD-NEXT: asr x12, x11, #6 ; CHECK-SD-NEXT: add x8, x8, x9 ; CHECK-SD-NEXT: add x11, x12, x11, lsr #63 ; CHECK-SD-NEXT: asr x13, x8, #6 ; CHECK-SD-NEXT: mov w12, #100 // =0x64 ; CHECK-SD-NEXT: msub x10, x11, x12, x10 ; CHECK-SD-NEXT: add x8, x13, x8, lsr #63 ; CHECK-SD-NEXT: msub x8, x8, x12, x9 ; CHECK-SD-NEXT: fmov d0, x10 ; CHECK-SD-NEXT: mov v0.d[1], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i64_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: fmov x9, d0 ; CHECK-GI-NEXT: mov w8, #100 // =0x64 ; CHECK-GI-NEXT: mov x10, v0.d[1] ; CHECK-GI-NEXT: sdiv x9, x9, x8 ; CHECK-GI-NEXT: sdiv x8, x10, x8 ; CHECK-GI-NEXT: fmov d1, x9 ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: adrp x8, .LCPI69_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI69_0] ; CHECK-GI-NEXT: fmov x11, d2 ; CHECK-GI-NEXT: mov x9, v2.d[1] ; CHECK-GI-NEXT: fmov x10, d1 ; CHECK-GI-NEXT: mov x8, v1.d[1] ; CHECK-GI-NEXT: mul x10, x10, x11 ; CHECK-GI-NEXT: mul x8, x8, x9 ; CHECK-GI-NEXT: fmov d1, x10 ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i64> %d, ret <2 x i64> %s } define <2 x i64> @uv2i64_7(<2 x i64> %d, <2 x i64> %e) { ; CHECK-SD-LABEL: uv2i64_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov x8, #9363 // =0x2493 ; CHECK-SD-NEXT: fmov x10, d0 ; CHECK-SD-NEXT: mov x9, v0.d[1] ; CHECK-SD-NEXT: movk x8, #37449, lsl #16 ; CHECK-SD-NEXT: movk x8, #18724, lsl #32 ; CHECK-SD-NEXT: movk x8, #9362, lsl #48 ; CHECK-SD-NEXT: umulh x11, x10, x8 ; CHECK-SD-NEXT: umulh x8, x9, x8 ; CHECK-SD-NEXT: sub x12, x10, x11 ; CHECK-SD-NEXT: add x11, x11, x12, lsr #1 ; CHECK-SD-NEXT: sub x12, x9, x8 ; CHECK-SD-NEXT: lsr x11, x11, #2 ; CHECK-SD-NEXT: add x8, x8, x12, lsr #1 ; CHECK-SD-NEXT: sub x11, x11, x11, lsl #3 ; CHECK-SD-NEXT: lsr x8, x8, #2 ; CHECK-SD-NEXT: add x10, x10, x11 ; CHECK-SD-NEXT: sub x8, x8, x8, lsl #3 ; CHECK-SD-NEXT: fmov d0, x10 ; CHECK-SD-NEXT: add x8, x9, x8 ; CHECK-SD-NEXT: mov v0.d[1], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i64_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x8, #9363 // =0x2493 ; CHECK-GI-NEXT: fmov x10, d0 ; CHECK-GI-NEXT: mov x9, v0.d[1] ; CHECK-GI-NEXT: movk x8, #37449, lsl #16 ; CHECK-GI-NEXT: movk x8, #18724, lsl #32 ; CHECK-GI-NEXT: movk x8, #9362, lsl #48 ; CHECK-GI-NEXT: umulh x10, x10, x8 ; CHECK-GI-NEXT: umulh x8, x9, x8 ; CHECK-GI-NEXT: fmov d1, x10 ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: adrp x8, .LCPI70_0 ; CHECK-GI-NEXT: sub v2.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: usra v1.2d, v2.2d, #1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI70_0] ; CHECK-GI-NEXT: fmov x11, d2 ; CHECK-GI-NEXT: mov x9, v2.d[1] ; CHECK-GI-NEXT: ushr v1.2d, v1.2d, #2 ; CHECK-GI-NEXT: fmov x10, d1 ; CHECK-GI-NEXT: mov x8, v1.d[1] ; CHECK-GI-NEXT: mul x10, x10, x11 ; CHECK-GI-NEXT: mul x8, x8, x9 ; CHECK-GI-NEXT: fmov d1, x10 ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i64> %d, ret <2 x i64> %s } define <2 x i64> @uv2i64_100(<2 x i64> %d, <2 x i64> %e) { ; CHECK-SD-LABEL: uv2i64_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fmov x10, d0 ; CHECK-SD-NEXT: mov x8, #62915 // =0xf5c3 ; CHECK-SD-NEXT: mov x9, v0.d[1] ; CHECK-SD-NEXT: movk x8, #23592, lsl #16 ; CHECK-SD-NEXT: movk x8, #49807, lsl #32 ; CHECK-SD-NEXT: lsr x11, x10, #2 ; CHECK-SD-NEXT: movk x8, #10485, lsl #48 ; CHECK-SD-NEXT: lsr x12, x9, #2 ; CHECK-SD-NEXT: umulh x11, x11, x8 ; CHECK-SD-NEXT: umulh x8, x12, x8 ; CHECK-SD-NEXT: mov w12, #100 // =0x64 ; CHECK-SD-NEXT: lsr x11, x11, #2 ; CHECK-SD-NEXT: msub x10, x11, x12, x10 ; CHECK-SD-NEXT: lsr x8, x8, #2 ; CHECK-SD-NEXT: msub x8, x8, x12, x9 ; CHECK-SD-NEXT: fmov d0, x10 ; CHECK-SD-NEXT: mov v0.d[1], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i64_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ushr v1.2d, v0.2d, #2 ; CHECK-GI-NEXT: mov x8, #62915 // =0xf5c3 ; CHECK-GI-NEXT: movk x8, #23592, lsl #16 ; CHECK-GI-NEXT: movk x8, #49807, lsl #32 ; CHECK-GI-NEXT: fmov x10, d1 ; CHECK-GI-NEXT: movk x8, #10485, lsl #48 ; CHECK-GI-NEXT: mov x9, v1.d[1] ; CHECK-GI-NEXT: umulh x10, x10, x8 ; CHECK-GI-NEXT: umulh x8, x9, x8 ; CHECK-GI-NEXT: fmov d1, x10 ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: adrp x8, .LCPI71_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI71_0] ; CHECK-GI-NEXT: fmov x11, d2 ; CHECK-GI-NEXT: mov x9, v2.d[1] ; CHECK-GI-NEXT: ushr v1.2d, v1.2d, #2 ; CHECK-GI-NEXT: fmov x10, d1 ; CHECK-GI-NEXT: mov x8, v1.d[1] ; CHECK-GI-NEXT: mul x10, x10, x11 ; CHECK-GI-NEXT: mul x8, x8, x9 ; CHECK-GI-NEXT: fmov d1, x10 ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i64> %d, ret <2 x i64> %s } define <2 x i128> @sv2i128_7(<2 x i128> %d, <2 x i128> %e) { ; CHECK-SD-LABEL: sv2i128_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w21, -24 ; CHECK-SD-NEXT: .cfi_offset w22, -32 ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: mov x19, x3 ; CHECK-SD-NEXT: mov x20, x2 ; CHECK-SD-NEXT: mov w2, #7 // =0x7 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __modti3 ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 ; CHECK-SD-NEXT: mov x0, x20 ; CHECK-SD-NEXT: mov x1, x19 ; CHECK-SD-NEXT: mov w2, #7 // =0x7 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __modti3 ; CHECK-SD-NEXT: mov x2, x0 ; CHECK-SD-NEXT: mov x3, x1 ; CHECK-SD-NEXT: mov x0, x21 ; CHECK-SD-NEXT: mov x1, x22 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i128_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 ; CHECK-GI-NEXT: .cfi_offset w21, -24 ; CHECK-GI-NEXT: .cfi_offset w22, -32 ; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: mov x19, x2 ; CHECK-GI-NEXT: mov x20, x3 ; CHECK-GI-NEXT: mov w2, #7 // =0x7 ; CHECK-GI-NEXT: mov x3, xzr ; CHECK-GI-NEXT: bl __modti3 ; CHECK-GI-NEXT: mov x21, x0 ; CHECK-GI-NEXT: mov x22, x1 ; CHECK-GI-NEXT: mov x0, x19 ; CHECK-GI-NEXT: mov x1, x20 ; CHECK-GI-NEXT: mov w2, #7 // =0x7 ; CHECK-GI-NEXT: mov x3, xzr ; CHECK-GI-NEXT: bl __modti3 ; CHECK-GI-NEXT: mov x2, x0 ; CHECK-GI-NEXT: mov x3, x1 ; CHECK-GI-NEXT: mov x0, x21 ; CHECK-GI-NEXT: mov x1, x22 ; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i128> %d, ret <2 x i128> %s } define <2 x i128> @sv2i128_100(<2 x i128> %d, <2 x i128> %e) { ; CHECK-SD-LABEL: sv2i128_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w21, -24 ; CHECK-SD-NEXT: .cfi_offset w22, -32 ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: mov x19, x3 ; CHECK-SD-NEXT: mov x20, x2 ; CHECK-SD-NEXT: mov w2, #100 // =0x64 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __modti3 ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 ; CHECK-SD-NEXT: mov x0, x20 ; CHECK-SD-NEXT: mov x1, x19 ; CHECK-SD-NEXT: mov w2, #100 // =0x64 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __modti3 ; CHECK-SD-NEXT: mov x2, x0 ; CHECK-SD-NEXT: mov x3, x1 ; CHECK-SD-NEXT: mov x0, x21 ; CHECK-SD-NEXT: mov x1, x22 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i128_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 ; CHECK-GI-NEXT: .cfi_offset w21, -24 ; CHECK-GI-NEXT: .cfi_offset w22, -32 ; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: mov x19, x2 ; CHECK-GI-NEXT: mov x20, x3 ; CHECK-GI-NEXT: mov w2, #100 // =0x64 ; CHECK-GI-NEXT: mov x3, xzr ; CHECK-GI-NEXT: bl __modti3 ; CHECK-GI-NEXT: mov x21, x0 ; CHECK-GI-NEXT: mov x22, x1 ; CHECK-GI-NEXT: mov x0, x19 ; CHECK-GI-NEXT: mov x1, x20 ; CHECK-GI-NEXT: mov w2, #100 // =0x64 ; CHECK-GI-NEXT: mov x3, xzr ; CHECK-GI-NEXT: bl __modti3 ; CHECK-GI-NEXT: mov x2, x0 ; CHECK-GI-NEXT: mov x3, x1 ; CHECK-GI-NEXT: mov x0, x21 ; CHECK-GI-NEXT: mov x1, x22 ; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-GI-NEXT: ret entry: %s = srem <2 x i128> %d, ret <2 x i128> %s } define <2 x i128> @uv2i128_7(<2 x i128> %d, <2 x i128> %e) { ; CHECK-SD-LABEL: uv2i128_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w21, -24 ; CHECK-SD-NEXT: .cfi_offset w22, -32 ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: mov x19, x3 ; CHECK-SD-NEXT: mov x20, x2 ; CHECK-SD-NEXT: mov w2, #7 // =0x7 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __umodti3 ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 ; CHECK-SD-NEXT: mov x0, x20 ; CHECK-SD-NEXT: mov x1, x19 ; CHECK-SD-NEXT: mov w2, #7 // =0x7 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __umodti3 ; CHECK-SD-NEXT: mov x2, x0 ; CHECK-SD-NEXT: mov x3, x1 ; CHECK-SD-NEXT: mov x0, x21 ; CHECK-SD-NEXT: mov x1, x22 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i128_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x10, #18725 // =0x4925 ; CHECK-GI-NEXT: mov x8, #9362 // =0x2492 ; CHECK-GI-NEXT: sub x4, x0, x0 ; CHECK-GI-NEXT: movk x10, #9362, lsl #16 ; CHECK-GI-NEXT: movk x8, #37449, lsl #16 ; CHECK-GI-NEXT: umulh x18, x0, xzr ; CHECK-GI-NEXT: movk x10, #37449, lsl #32 ; CHECK-GI-NEXT: movk x8, #18724, lsl #32 ; CHECK-GI-NEXT: movk x10, #18724, lsl #48 ; CHECK-GI-NEXT: movk x8, #9362, lsl #48 ; CHECK-GI-NEXT: mul x11, x1, x10 ; CHECK-GI-NEXT: mul x12, x0, x8 ; CHECK-GI-NEXT: umulh x13, x0, x10 ; CHECK-GI-NEXT: mul x14, x1, x8 ; CHECK-GI-NEXT: adds x11, x11, x12 ; CHECK-GI-NEXT: umulh x15, x1, x10 ; CHECK-GI-NEXT: cset w12, hs ; CHECK-GI-NEXT: cmn x11, x13 ; CHECK-GI-NEXT: and x11, x12, #0x1 ; CHECK-GI-NEXT: umulh x16, x0, x8 ; CHECK-GI-NEXT: cset w12, hs ; CHECK-GI-NEXT: add x14, x14, x4 ; CHECK-GI-NEXT: and x12, x12, #0x1 ; CHECK-GI-NEXT: and x4, xzr, #0x1 ; CHECK-GI-NEXT: mul x13, x3, x10 ; CHECK-GI-NEXT: add x11, x11, x12 ; CHECK-GI-NEXT: and x12, xzr, #0x1 ; CHECK-GI-NEXT: adds x14, x14, x15 ; CHECK-GI-NEXT: add x12, x12, x4 ; CHECK-GI-NEXT: mul x5, x2, x8 ; CHECK-GI-NEXT: cset w4, hs ; CHECK-GI-NEXT: adds x14, x14, x16 ; CHECK-GI-NEXT: and x16, x4, #0x1 ; CHECK-GI-NEXT: umulh x9, xzr, x10 ; CHECK-GI-NEXT: cset w4, hs ; CHECK-GI-NEXT: adds x11, x14, x11 ; CHECK-GI-NEXT: add x12, x12, x16 ; CHECK-GI-NEXT: and x16, x4, #0x1 ; CHECK-GI-NEXT: cset w14, hs ; CHECK-GI-NEXT: umulh x17, x1, x8 ; CHECK-GI-NEXT: add x12, x12, x16 ; CHECK-GI-NEXT: adds x13, x13, x5 ; CHECK-GI-NEXT: umulh x15, x2, x10 ; CHECK-GI-NEXT: cset w4, hs ; CHECK-GI-NEXT: and x16, x4, #0x1 ; CHECK-GI-NEXT: mul x6, x3, x8 ; CHECK-GI-NEXT: umulh x10, x3, x10 ; CHECK-GI-NEXT: cmn x13, x15 ; CHECK-GI-NEXT: and x13, x14, #0x1 ; CHECK-GI-NEXT: add x14, x9, x17 ; CHECK-GI-NEXT: umulh x15, x2, x8 ; CHECK-GI-NEXT: add x12, x12, x13 ; CHECK-GI-NEXT: add x13, x14, x18 ; CHECK-GI-NEXT: cset w14, hs ; CHECK-GI-NEXT: sub x17, x2, x2 ; CHECK-GI-NEXT: and x18, xzr, #0x1 ; CHECK-GI-NEXT: and x14, x14, #0x1 ; CHECK-GI-NEXT: umulh x8, x3, x8 ; CHECK-GI-NEXT: add x12, x13, x12 ; CHECK-GI-NEXT: add x14, x16, x14 ; CHECK-GI-NEXT: add x16, x6, x17 ; CHECK-GI-NEXT: and x17, xzr, #0x1 ; CHECK-GI-NEXT: adds x10, x16, x10 ; CHECK-GI-NEXT: add x17, x17, x18 ; CHECK-GI-NEXT: cset w16, hs ; CHECK-GI-NEXT: adds x10, x10, x15 ; CHECK-GI-NEXT: umulh x15, x2, xzr ; CHECK-GI-NEXT: and x16, x16, #0x1 ; CHECK-GI-NEXT: cset w18, hs ; CHECK-GI-NEXT: adds x10, x10, x14 ; CHECK-GI-NEXT: add x16, x17, x16 ; CHECK-GI-NEXT: and x17, x18, #0x1 ; CHECK-GI-NEXT: cset w14, hs ; CHECK-GI-NEXT: add x13, x16, x17 ; CHECK-GI-NEXT: and x14, x14, #0x1 ; CHECK-GI-NEXT: add x8, x9, x8 ; CHECK-GI-NEXT: subs x9, x0, x11 ; CHECK-GI-NEXT: add x13, x13, x14 ; CHECK-GI-NEXT: add x8, x8, x15 ; CHECK-GI-NEXT: sbc x14, x1, x12 ; CHECK-GI-NEXT: add x8, x8, x13 ; CHECK-GI-NEXT: subs x13, x2, x10 ; CHECK-GI-NEXT: lsl x15, x14, #63 ; CHECK-GI-NEXT: sbc x16, x3, x8 ; CHECK-GI-NEXT: lsr x14, x14, #1 ; CHECK-GI-NEXT: orr x9, x15, x9, lsr #1 ; CHECK-GI-NEXT: lsl x15, x16, #63 ; CHECK-GI-NEXT: orr x13, x15, x13, lsr #1 ; CHECK-GI-NEXT: adds x9, x9, x11 ; CHECK-GI-NEXT: lsr x11, x16, #1 ; CHECK-GI-NEXT: adc x12, x14, x12 ; CHECK-GI-NEXT: adds x10, x13, x10 ; CHECK-GI-NEXT: lsl x13, x12, #62 ; CHECK-GI-NEXT: lsr x12, x12, #2 ; CHECK-GI-NEXT: adc x8, x11, x8 ; CHECK-GI-NEXT: lsl x11, x8, #62 ; CHECK-GI-NEXT: orr x9, x13, x9, lsr #2 ; CHECK-GI-NEXT: mov w13, #7 // =0x7 ; CHECK-GI-NEXT: lsr x8, x8, #2 ; CHECK-GI-NEXT: lsl x14, x12, #3 ; CHECK-GI-NEXT: orr x10, x11, x10, lsr #2 ; CHECK-GI-NEXT: umulh x11, x9, x13 ; CHECK-GI-NEXT: lsl x15, x9, #3 ; CHECK-GI-NEXT: sub x12, x14, x12 ; CHECK-GI-NEXT: lsl x16, x8, #3 ; CHECK-GI-NEXT: umulh x13, x10, x13 ; CHECK-GI-NEXT: lsl x14, x10, #3 ; CHECK-GI-NEXT: sub x9, x15, x9 ; CHECK-GI-NEXT: sub x8, x16, x8 ; CHECK-GI-NEXT: subs x0, x0, x9 ; CHECK-GI-NEXT: sub x10, x14, x10 ; CHECK-GI-NEXT: add x11, x12, x11 ; CHECK-GI-NEXT: sbc x1, x1, x11 ; CHECK-GI-NEXT: subs x2, x2, x10 ; CHECK-GI-NEXT: add x8, x8, x13 ; CHECK-GI-NEXT: sbc x3, x3, x8 ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i128> %d, ret <2 x i128> %s } define <2 x i128> @uv2i128_100(<2 x i128> %d, <2 x i128> %e) { ; CHECK-SD-LABEL: uv2i128_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-SD-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 ; CHECK-SD-NEXT: .cfi_offset w19, -8 ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w21, -24 ; CHECK-SD-NEXT: .cfi_offset w22, -32 ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: mov x19, x3 ; CHECK-SD-NEXT: mov x20, x2 ; CHECK-SD-NEXT: mov w2, #100 // =0x64 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __umodti3 ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 ; CHECK-SD-NEXT: mov x0, x20 ; CHECK-SD-NEXT: mov x1, x19 ; CHECK-SD-NEXT: mov w2, #100 // =0x64 ; CHECK-SD-NEXT: mov x3, xzr ; CHECK-SD-NEXT: bl __umodti3 ; CHECK-SD-NEXT: mov x2, x0 ; CHECK-SD-NEXT: mov x3, x1 ; CHECK-SD-NEXT: mov x0, x21 ; CHECK-SD-NEXT: mov x1, x22 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: uv2i128_100: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x10, #23593 // =0x5c29 ; CHECK-GI-NEXT: mov x8, #62914 // =0xf5c2 ; CHECK-GI-NEXT: sub x18, x0, x0 ; CHECK-GI-NEXT: movk x10, #49807, lsl #16 ; CHECK-GI-NEXT: movk x8, #23592, lsl #16 ; CHECK-GI-NEXT: movk x10, #10485, lsl #32 ; CHECK-GI-NEXT: movk x8, #49807, lsl #32 ; CHECK-GI-NEXT: movk x10, #36700, lsl #48 ; CHECK-GI-NEXT: movk x8, #10485, lsl #48 ; CHECK-GI-NEXT: mul x11, x1, x10 ; CHECK-GI-NEXT: mul x12, x0, x8 ; CHECK-GI-NEXT: umulh x13, x0, x10 ; CHECK-GI-NEXT: mul x14, x1, x8 ; CHECK-GI-NEXT: adds x11, x11, x12 ; CHECK-GI-NEXT: umulh x15, x1, x10 ; CHECK-GI-NEXT: cset w12, hs ; CHECK-GI-NEXT: cmn x11, x13 ; CHECK-GI-NEXT: and x11, x12, #0x1 ; CHECK-GI-NEXT: umulh x16, x0, x8 ; CHECK-GI-NEXT: cset w12, hs ; CHECK-GI-NEXT: and x12, x12, #0x1 ; CHECK-GI-NEXT: add x14, x14, x18 ; CHECK-GI-NEXT: add x11, x11, x12 ; CHECK-GI-NEXT: and x12, xzr, #0x1 ; CHECK-GI-NEXT: umulh x9, xzr, x10 ; CHECK-GI-NEXT: adds x14, x14, x15 ; CHECK-GI-NEXT: and x15, xzr, #0x1 ; CHECK-GI-NEXT: umulh x17, x1, x8 ; CHECK-GI-NEXT: cset w4, hs ; CHECK-GI-NEXT: add x15, x12, x15 ; CHECK-GI-NEXT: adds x12, x14, x16 ; CHECK-GI-NEXT: and x4, x4, #0x1 ; CHECK-GI-NEXT: mul x18, x3, x10 ; CHECK-GI-NEXT: cset w14, hs ; CHECK-GI-NEXT: adds x12, x12, x11 ; CHECK-GI-NEXT: add x11, x15, x4 ; CHECK-GI-NEXT: and x14, x14, #0x1 ; CHECK-GI-NEXT: cset w15, hs ; CHECK-GI-NEXT: mul x5, x2, x8 ; CHECK-GI-NEXT: add x11, x11, x14 ; CHECK-GI-NEXT: and x14, x15, #0x1 ; CHECK-GI-NEXT: add x17, x9, x17 ; CHECK-GI-NEXT: add x14, x11, x14 ; CHECK-GI-NEXT: mov w11, #100 // =0x64 ; CHECK-GI-NEXT: umulh x13, x0, xzr ; CHECK-GI-NEXT: umulh x16, x2, x10 ; CHECK-GI-NEXT: adds x18, x18, x5 ; CHECK-GI-NEXT: mul x15, x3, x8 ; CHECK-GI-NEXT: add x13, x17, x13 ; CHECK-GI-NEXT: cset w17, hs ; CHECK-GI-NEXT: umulh x10, x3, x10 ; CHECK-GI-NEXT: add x13, x13, x14 ; CHECK-GI-NEXT: and x17, x17, #0x1 ; CHECK-GI-NEXT: cmn x18, x16 ; CHECK-GI-NEXT: sub x18, x2, x2 ; CHECK-GI-NEXT: umulh x16, x2, x8 ; CHECK-GI-NEXT: cset w14, hs ; CHECK-GI-NEXT: and x14, x14, #0x1 ; CHECK-GI-NEXT: add x15, x15, x18 ; CHECK-GI-NEXT: and x18, xzr, #0x1 ; CHECK-GI-NEXT: add x14, x17, x14 ; CHECK-GI-NEXT: umulh x8, x3, x8 ; CHECK-GI-NEXT: and x17, xzr, #0x1 ; CHECK-GI-NEXT: adds x10, x15, x10 ; CHECK-GI-NEXT: add x15, x17, x18 ; CHECK-GI-NEXT: cset w17, hs ; CHECK-GI-NEXT: umulh x18, x2, xzr ; CHECK-GI-NEXT: and x17, x17, #0x1 ; CHECK-GI-NEXT: adds x10, x10, x16 ; CHECK-GI-NEXT: lsl x16, x13, #60 ; CHECK-GI-NEXT: add x15, x15, x17 ; CHECK-GI-NEXT: cset w17, hs ; CHECK-GI-NEXT: adds x10, x10, x14 ; CHECK-GI-NEXT: and x14, x17, #0x1 ; CHECK-GI-NEXT: cset w17, hs ; CHECK-GI-NEXT: add x8, x9, x8 ; CHECK-GI-NEXT: add x14, x15, x14 ; CHECK-GI-NEXT: and x15, x17, #0x1 ; CHECK-GI-NEXT: orr x12, x16, x12, lsr #4 ; CHECK-GI-NEXT: add x9, x14, x15 ; CHECK-GI-NEXT: add x8, x8, x18 ; CHECK-GI-NEXT: add x8, x8, x9 ; CHECK-GI-NEXT: lsr x9, x13, #4 ; CHECK-GI-NEXT: umulh x14, x12, x11 ; CHECK-GI-NEXT: lsl x13, x8, #60 ; CHECK-GI-NEXT: lsr x8, x8, #4 ; CHECK-GI-NEXT: mul x12, x12, x11 ; CHECK-GI-NEXT: orr x10, x13, x10, lsr #4 ; CHECK-GI-NEXT: madd x9, x9, x11, x14 ; CHECK-GI-NEXT: umulh x13, x10, x11 ; CHECK-GI-NEXT: subs x0, x0, x12 ; CHECK-GI-NEXT: mul x10, x10, x11 ; CHECK-GI-NEXT: sbc x1, x1, x9 ; CHECK-GI-NEXT: madd x8, x8, x11, x13 ; CHECK-GI-NEXT: subs x2, x2, x10 ; CHECK-GI-NEXT: sbc x3, x3, x8 ; CHECK-GI-NEXT: ret entry: %s = urem <2 x i128> %d, ret <2 x i128> %s }