; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" ; ; truncate i16 -> i8 ; define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v16i16_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z3.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z2.b, z1.b, z1.b ; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v16i16_v16i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30] ; NONEON-NOSVE-NEXT: strb w8, [sp, #47] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28] ; NONEON-NOSVE-NEXT: strb w8, [sp, #46] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26] ; NONEON-NOSVE-NEXT: strb w8, [sp, #45] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24] ; NONEON-NOSVE-NEXT: strb w8, [sp, #44] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] ; NONEON-NOSVE-NEXT: strb w8, [sp, #43] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] ; NONEON-NOSVE-NEXT: strb w8, [sp, #42] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] ; NONEON-NOSVE-NEXT: strb w8, [sp, #41] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] ; NONEON-NOSVE-NEXT: strb w8, [sp, #40] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] ; NONEON-NOSVE-NEXT: strb w8, [sp, #39] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] ; NONEON-NOSVE-NEXT: strb w8, [sp, #38] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] ; NONEON-NOSVE-NEXT: strb w8, [sp, #37] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] ; NONEON-NOSVE-NEXT: strb w8, [sp, #36] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #6] ; NONEON-NOSVE-NEXT: strb w8, [sp, #35] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #4] ; NONEON-NOSVE-NEXT: strb w8, [sp, #34] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] ; NONEON-NOSVE-NEXT: strb w8, [sp, #33] ; NONEON-NOSVE-NEXT: ldrh w8, [sp] ; NONEON-NOSVE-NEXT: strb w8, [sp, #32] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] ; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = trunc <16 x i16> %a to <16 x i8> ret <16 x i8> %b } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i16_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b ; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b ; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b ; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b } ; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b } ; CHECK-NEXT: add z1.b, z2.b, z2.b ; CHECK-NEXT: add z0.b, z0.b, z0.b ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i16_v32i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #208 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #112] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #128] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #144] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #160] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #16] ; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #48] ; NONEON-NOSVE-NEXT: ldrh w25, [sp, #28] ; NONEON-NOSVE-NEXT: ldrh w26, [sp, #30] ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #64] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #66] ; NONEON-NOSVE-NEXT: ldrh w29, [sp, #52] ; NONEON-NOSVE-NEXT: ldrh w27, [sp, #48] ; NONEON-NOSVE-NEXT: ldrh w28, [sp, #50] ; NONEON-NOSVE-NEXT: ldrh w23, [sp, #24] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #54] ; NONEON-NOSVE-NEXT: ldrh w24, [sp, #26] ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w21, [sp, #20] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: ldrh w22, [sp, #22] ; NONEON-NOSVE-NEXT: strb w8, [sp, #92] ; NONEON-NOSVE-NEXT: add w8, w29, w29 ; NONEON-NOSVE-NEXT: ldrh w4, [sp, #44] ; NONEON-NOSVE-NEXT: strb w9, [sp, #91] ; NONEON-NOSVE-NEXT: add w9, w28, w28 ; NONEON-NOSVE-NEXT: ldrh w7, [sp, #46] ; NONEON-NOSVE-NEXT: strb w8, [sp, #90] ; NONEON-NOSVE-NEXT: add w8, w27, w27 ; NONEON-NOSVE-NEXT: ldrh w2, [sp, #40] ; NONEON-NOSVE-NEXT: strb w9, [sp, #89] ; NONEON-NOSVE-NEXT: add w9, w26, w26 ; NONEON-NOSVE-NEXT: ldrh w3, [sp, #42] ; NONEON-NOSVE-NEXT: strb w8, [sp, #88] ; NONEON-NOSVE-NEXT: add w8, w25, w25 ; NONEON-NOSVE-NEXT: ldrh w18, [sp, #36] ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w19, [sp, #16] ; NONEON-NOSVE-NEXT: ldrh w20, [sp, #18] ; NONEON-NOSVE-NEXT: strb w9, [sp, #87] ; NONEON-NOSVE-NEXT: add w9, w24, w24 ; NONEON-NOSVE-NEXT: ldrh w0, [sp, #38] ; NONEON-NOSVE-NEXT: strb w8, [sp, #86] ; NONEON-NOSVE-NEXT: add w8, w23, w23 ; NONEON-NOSVE-NEXT: ldrh w12, [sp, #60] ; NONEON-NOSVE-NEXT: strb w9, [sp, #85] ; NONEON-NOSVE-NEXT: add w9, w22, w22 ; NONEON-NOSVE-NEXT: ldrh w13, [sp, #62] ; NONEON-NOSVE-NEXT: add w6, w12, w12 ; NONEON-NOSVE-NEXT: strb w8, [sp, #84] ; NONEON-NOSVE-NEXT: add w8, w21, w21 ; NONEON-NOSVE-NEXT: add w5, w13, w13 ; NONEON-NOSVE-NEXT: strb w9, [sp, #83] ; NONEON-NOSVE-NEXT: add w9, w20, w20 ; NONEON-NOSVE-NEXT: strb w8, [sp, #82] ; NONEON-NOSVE-NEXT: add w8, w19, w19 ; NONEON-NOSVE-NEXT: ldrh w16, [sp, #32] ; NONEON-NOSVE-NEXT: strb w9, [sp, #81] ; NONEON-NOSVE-NEXT: add w9, w7, w7 ; NONEON-NOSVE-NEXT: ldrh w17, [sp, #34] ; NONEON-NOSVE-NEXT: strb w8, [sp, #80] ; NONEON-NOSVE-NEXT: add w8, w4, w4 ; NONEON-NOSVE-NEXT: ldrh w14, [sp, #76] ; NONEON-NOSVE-NEXT: strb w9, [sp, #111] ; NONEON-NOSVE-NEXT: add w9, w3, w3 ; NONEON-NOSVE-NEXT: ldrh w15, [sp, #78] ; NONEON-NOSVE-NEXT: strb w8, [sp, #110] ; NONEON-NOSVE-NEXT: add w8, w2, w2 ; NONEON-NOSVE-NEXT: ldrh w12, [sp, #72] ; NONEON-NOSVE-NEXT: strb w9, [sp, #109] ; NONEON-NOSVE-NEXT: add w9, w0, w0 ; NONEON-NOSVE-NEXT: ldrh w13, [sp, #74] ; NONEON-NOSVE-NEXT: strb w8, [sp, #108] ; NONEON-NOSVE-NEXT: add w8, w18, w18 ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #68] ; NONEON-NOSVE-NEXT: strb w9, [sp, #107] ; NONEON-NOSVE-NEXT: add w9, w17, w17 ; NONEON-NOSVE-NEXT: ldrh w11, [sp, #70] ; NONEON-NOSVE-NEXT: strb w8, [sp, #106] ; NONEON-NOSVE-NEXT: add w8, w16, w16 ; NONEON-NOSVE-NEXT: ldrh w30, [sp, #58] ; NONEON-NOSVE-NEXT: strb w9, [sp, #105] ; NONEON-NOSVE-NEXT: add w9, w15, w15 ; NONEON-NOSVE-NEXT: strb w8, [sp, #104] ; NONEON-NOSVE-NEXT: add w8, w14, w14 ; NONEON-NOSVE-NEXT: strb w9, [sp, #103] ; NONEON-NOSVE-NEXT: add w9, w13, w13 ; NONEON-NOSVE-NEXT: strb w8, [sp, #102] ; NONEON-NOSVE-NEXT: add w8, w12, w12 ; NONEON-NOSVE-NEXT: strb w9, [sp, #101] ; NONEON-NOSVE-NEXT: add w9, w11, w11 ; NONEON-NOSVE-NEXT: strb w8, [sp, #100] ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: strb w9, [sp, #99] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #98] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w5, [sp, #95] ; NONEON-NOSVE-NEXT: add w5, w30, w30 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w6, [sp, #94] ; NONEON-NOSVE-NEXT: strb w5, [sp, #93] ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #97] ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #96] ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #160] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #80] ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #144] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #128] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #112] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] ; NONEON-NOSVE-NEXT: add sp, sp, #208 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i16>, ptr %in %b = trunc <32 x i16> %a to <32 x i8> %c = add <32 x i8> %b, %b store <32 x i8> %c, ptr %out ret void } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v64i16_v64i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #64] ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: ldp q2, q3, [x0, #96] ; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: uzp1 z7.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z6.b, z1.b, z1.b ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: uzp1 z17.b, z3.b, z3.b ; CHECK-NEXT: uzp1 z16.b, z2.b, z2.b ; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b ; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b ; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b ; CHECK-NEXT: splice z0.b, p0, { z6.b, z7.b } ; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b ; CHECK-NEXT: splice z1.b, p0, { z16.b, z17.b } ; CHECK-NEXT: splice z2.b, p0, { z2.b, z3.b } ; CHECK-NEXT: splice z3.b, p0, { z4.b, z5.b } ; CHECK-NEXT: add z0.b, z0.b, z0.b ; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: add z2.b, z2.b, z2.b ; CHECK-NEXT: add z3.b, z3.b, z3.b ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v64i16_v64i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #448 ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #416] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] ; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #432] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #400] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] ; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #224] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #238] ; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #256] ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #232] ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #272] ; NONEON-NOSVE-NEXT: stp q5, q7, [sp, #160] ; NONEON-NOSVE-NEXT: ldrh w11, [sp, #230] ; NONEON-NOSVE-NEXT: add w21, w8, w8 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #274] ; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #192] ; NONEON-NOSVE-NEXT: ldrh w12, [sp, #228] ; NONEON-NOSVE-NEXT: ldrh w13, [sp, #226] ; NONEON-NOSVE-NEXT: ldrh w14, [sp, #224] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #276] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #278] ; NONEON-NOSVE-NEXT: ldrh w15, [sp, #270] ; NONEON-NOSVE-NEXT: ldrh w16, [sp, #268] ; NONEON-NOSVE-NEXT: ldrh w17, [sp, #266] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #280] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #282] ; NONEON-NOSVE-NEXT: ldrh w18, [sp, #264] ; NONEON-NOSVE-NEXT: ldrh w0, [sp, #262] ; NONEON-NOSVE-NEXT: ldrh w1, [sp, #260] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #284] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #286] ; NONEON-NOSVE-NEXT: ldrh w2, [sp, #258] ; NONEON-NOSVE-NEXT: ldrh w3, [sp, #256] ; NONEON-NOSVE-NEXT: ldrh w4, [sp, #254] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #208] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #210] ; NONEON-NOSVE-NEXT: ldrh w5, [sp, #252] ; NONEON-NOSVE-NEXT: ldrh w6, [sp, #250] ; NONEON-NOSVE-NEXT: ldrh w7, [sp, #248] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #212] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #214] ; NONEON-NOSVE-NEXT: ldrh w19, [sp, #246] ; NONEON-NOSVE-NEXT: ldrh w20, [sp, #244] ; NONEON-NOSVE-NEXT: ldrh w22, [sp, #242] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #216] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #218] ; NONEON-NOSVE-NEXT: ldrh w23, [sp, #240] ; NONEON-NOSVE-NEXT: ldrh w24, [sp, #174] ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #384] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #220] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #222] ; NONEON-NOSVE-NEXT: ldrh w25, [sp, #172] ; NONEON-NOSVE-NEXT: ldrh w26, [sp, #170] ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #368] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #176] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #178] ; NONEON-NOSVE-NEXT: ldrh w27, [sp, #168] ; NONEON-NOSVE-NEXT: ldrh w28, [sp, #166] ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #352] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #180] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #182] ; NONEON-NOSVE-NEXT: ldrh w29, [sp, #164] ; NONEON-NOSVE-NEXT: ldrh w30, [sp, #162] ; NONEON-NOSVE-NEXT: strb w21, [sp, #335] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #184] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #186] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #188] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #190] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #192] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #194] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #196] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #198] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #200] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #202] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #204] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #206] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #160] ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #236] ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #234] ; NONEON-NOSVE-NEXT: strb w9, [sp, #334] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #333] ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: strb w8, [sp, #332] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: strb w8, [sp, #331] ; NONEON-NOSVE-NEXT: add w8, w12, w12 ; NONEON-NOSVE-NEXT: strb w8, [sp, #330] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strb w8, [sp, #329] ; NONEON-NOSVE-NEXT: add w8, w14, w14 ; NONEON-NOSVE-NEXT: strb w8, [sp, #328] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: strb w8, [sp, #327] ; NONEON-NOSVE-NEXT: add w8, w16, w16 ; NONEON-NOSVE-NEXT: strb w8, [sp, #326] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: strb w8, [sp, #325] ; NONEON-NOSVE-NEXT: add w8, w18, w18 ; NONEON-NOSVE-NEXT: strb w8, [sp, #324] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: strb w8, [sp, #323] ; NONEON-NOSVE-NEXT: add w8, w1, w1 ; NONEON-NOSVE-NEXT: strb w8, [sp, #322] ; NONEON-NOSVE-NEXT: add w8, w2, w2 ; NONEON-NOSVE-NEXT: strb w8, [sp, #321] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: strb w8, [sp, #320] ; NONEON-NOSVE-NEXT: add w8, w4, w4 ; NONEON-NOSVE-NEXT: strb w8, [sp, #319] ; NONEON-NOSVE-NEXT: add w8, w5, w5 ; NONEON-NOSVE-NEXT: strb w8, [sp, #318] ; NONEON-NOSVE-NEXT: add w8, w6, w6 ; NONEON-NOSVE-NEXT: strb w8, [sp, #317] ; NONEON-NOSVE-NEXT: add w8, w7, w7 ; NONEON-NOSVE-NEXT: strb w8, [sp, #316] ; NONEON-NOSVE-NEXT: add w8, w19, w19 ; NONEON-NOSVE-NEXT: strb w8, [sp, #315] ; NONEON-NOSVE-NEXT: add w8, w20, w20 ; NONEON-NOSVE-NEXT: strb w8, [sp, #314] ; NONEON-NOSVE-NEXT: add w8, w22, w22 ; NONEON-NOSVE-NEXT: strb w8, [sp, #313] ; NONEON-NOSVE-NEXT: add w8, w23, w23 ; NONEON-NOSVE-NEXT: strb w8, [sp, #312] ; NONEON-NOSVE-NEXT: add w8, w24, w24 ; NONEON-NOSVE-NEXT: strb w8, [sp, #311] ; NONEON-NOSVE-NEXT: add w8, w25, w25 ; NONEON-NOSVE-NEXT: strb w8, [sp, #310] ; NONEON-NOSVE-NEXT: add w8, w26, w26 ; NONEON-NOSVE-NEXT: strb w8, [sp, #309] ; NONEON-NOSVE-NEXT: add w8, w27, w27 ; NONEON-NOSVE-NEXT: strb w8, [sp, #308] ; NONEON-NOSVE-NEXT: add w8, w28, w28 ; NONEON-NOSVE-NEXT: strb w8, [sp, #307] ; NONEON-NOSVE-NEXT: add w8, w29, w29 ; NONEON-NOSVE-NEXT: strb w8, [sp, #306] ; NONEON-NOSVE-NEXT: add w8, w30, w30 ; NONEON-NOSVE-NEXT: strb w8, [sp, #305] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #432] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #416] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #304] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #400] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #384] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #303] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #368] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #352] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #302] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #301] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #300] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #299] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #298] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #297] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #296] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #295] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #294] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #293] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #292] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #291] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #290] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #289] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #288] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #288] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #351] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #350] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #349] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #348] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #347] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #346] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #345] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #344] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #343] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #342] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #341] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #340] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #339] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #338] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #337] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #336] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #320] ; NONEON-NOSVE-NEXT: stp q3, q2, [x8] ; NONEON-NOSVE-NEXT: stp q0, q1, [x8, #32] ; NONEON-NOSVE-NEXT: add sp, sp, #448 ; NONEON-NOSVE-NEXT: ret %a = load <64 x i16>, ptr %in %b = trunc <64 x i16> %a to <64 x i8> %c = add <64 x i8> %b, %b store <64 x i8> %c, ptr %out ret void } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v128i16_v128i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #192] ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: ldp q6, q7, [x0, #64] ; CHECK-NEXT: uzp1 z17.b, z3.b, z3.b ; CHECK-NEXT: ldp q3, q18, [x0, #224] ; CHECK-NEXT: uzp1 z16.b, z2.b, z2.b ; CHECK-NEXT: ldp q2, q19, [x0, #128] ; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: uzp1 z21.b, z18.b, z18.b ; CHECK-NEXT: ldp q18, q22, [x0, #160] ; CHECK-NEXT: uzp1 z20.b, z3.b, z3.b ; CHECK-NEXT: uzp1 z24.b, z19.b, z19.b ; CHECK-NEXT: ldp q3, q19, [x0, #96] ; CHECK-NEXT: uzp1 z23.b, z2.b, z2.b ; CHECK-NEXT: uzp1 z26.b, z22.b, z22.b ; CHECK-NEXT: splice z2.b, p0, { z16.b, z17.b } ; CHECK-NEXT: uzp1 z17.b, z7.b, z7.b ; CHECK-NEXT: uzp1 z25.b, z18.b, z18.b ; CHECK-NEXT: splice z7.b, p0, { z20.b, z21.b } ; CHECK-NEXT: uzp1 z21.b, z5.b, z5.b ; CHECK-NEXT: uzp1 z19.b, z19.b, z19.b ; CHECK-NEXT: uzp1 z20.b, z4.b, z4.b ; CHECK-NEXT: uzp1 z5.b, z1.b, z1.b ; CHECK-NEXT: uzp1 z16.b, z6.b, z6.b ; CHECK-NEXT: splice z6.b, p0, { z23.b, z24.b } ; CHECK-NEXT: uzp1 z18.b, z3.b, z3.b ; CHECK-NEXT: splice z3.b, p0, { z25.b, z26.b } ; CHECK-NEXT: uzp1 z4.b, z0.b, z0.b ; CHECK-NEXT: add z0.b, z2.b, z2.b ; CHECK-NEXT: add z7.b, z7.b, z7.b ; CHECK-NEXT: splice z1.b, p0, { z16.b, z17.b } ; CHECK-NEXT: splice z2.b, p0, { z18.b, z19.b } ; CHECK-NEXT: splice z16.b, p0, { z20.b, z21.b } ; CHECK-NEXT: splice z4.b, p0, { z4.b, z5.b } ; CHECK-NEXT: add z6.b, z6.b, z6.b ; CHECK-NEXT: add z3.b, z3.b, z3.b ; CHECK-NEXT: stp q0, q7, [x1, #96] ; CHECK-NEXT: add z0.b, z1.b, z1.b ; CHECK-NEXT: add z1.b, z2.b, z2.b ; CHECK-NEXT: add z2.b, z16.b, z16.b ; CHECK-NEXT: stp q6, q3, [x1, #64] ; CHECK-NEXT: add z3.b, z4.b, z4.b ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: sub sp, sp, #800 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] ; NONEON-NOSVE-NEXT: str x1, [sp, #408] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96] ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64] ; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #192] ; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #160] ; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #128] ; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #224] ; NONEON-NOSVE-NEXT: str q0, [sp, #592] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #606] ; NONEON-NOSVE-NEXT: str q19, [sp, #496] ; NONEON-NOSVE-NEXT: ldrh w10, [sp, #600] ; NONEON-NOSVE-NEXT: stp q18, q20, [sp, #512] ; NONEON-NOSVE-NEXT: ldrh w11, [sp, #598] ; NONEON-NOSVE-NEXT: ldrh w12, [sp, #596] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #432] ; NONEON-NOSVE-NEXT: ldrh w13, [sp, #594] ; NONEON-NOSVE-NEXT: str w8, [sp, #64] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #432] ; NONEON-NOSVE-NEXT: ldrh w14, [sp, #592] ; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #464] ; NONEON-NOSVE-NEXT: ldr w30, [sp, #64] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: str w8, [sp, #404] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #434] ; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #560] ; NONEON-NOSVE-NEXT: str w8, [sp, #400] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #436] ; NONEON-NOSVE-NEXT: str q5, [sp, #544] ; NONEON-NOSVE-NEXT: str w8, [sp, #396] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #438] ; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #608] ; NONEON-NOSVE-NEXT: str w8, [sp, #392] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #440] ; NONEON-NOSVE-NEXT: ldrh w15, [sp, #638] ; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #640] ; NONEON-NOSVE-NEXT: ldrh w16, [sp, #636] ; NONEON-NOSVE-NEXT: ldrh w17, [sp, #634] ; NONEON-NOSVE-NEXT: str w8, [sp, #388] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #442] ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #666] ; NONEON-NOSVE-NEXT: str q3, [sp, #416] ; NONEON-NOSVE-NEXT: ldrh w18, [sp, #632] ; NONEON-NOSVE-NEXT: ldrh w0, [sp, #630] ; NONEON-NOSVE-NEXT: str w8, [sp, #384] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #444] ; NONEON-NOSVE-NEXT: ldrh w1, [sp, #628] ; NONEON-NOSVE-NEXT: ldrh w2, [sp, #626] ; NONEON-NOSVE-NEXT: ldrh w3, [sp, #624] ; NONEON-NOSVE-NEXT: ldrh w4, [sp, #622] ; NONEON-NOSVE-NEXT: str w8, [sp, #380] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #446] ; NONEON-NOSVE-NEXT: ldrh w5, [sp, #620] ; NONEON-NOSVE-NEXT: ldrh w6, [sp, #618] ; NONEON-NOSVE-NEXT: ldrh w7, [sp, #616] ; NONEON-NOSVE-NEXT: ldrh w19, [sp, #614] ; NONEON-NOSVE-NEXT: str w8, [sp, #376] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #480] ; NONEON-NOSVE-NEXT: ldrh w20, [sp, #612] ; NONEON-NOSVE-NEXT: ldrh w21, [sp, #610] ; NONEON-NOSVE-NEXT: ldrh w22, [sp, #608] ; NONEON-NOSVE-NEXT: ldrh w23, [sp, #430] ; NONEON-NOSVE-NEXT: str w8, [sp, #372] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #482] ; NONEON-NOSVE-NEXT: ldrh w24, [sp, #428] ; NONEON-NOSVE-NEXT: ldrh w25, [sp, #426] ; NONEON-NOSVE-NEXT: ldrh w26, [sp, #424] ; NONEON-NOSVE-NEXT: ldrh w27, [sp, #422] ; NONEON-NOSVE-NEXT: str w8, [sp, #368] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #484] ; NONEON-NOSVE-NEXT: ldrh w28, [sp, #420] ; NONEON-NOSVE-NEXT: ldrh w29, [sp, #418] ; NONEON-NOSVE-NEXT: strb w30, [sp, #767] ; NONEON-NOSVE-NEXT: str w8, [sp, #364] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #486] ; NONEON-NOSVE-NEXT: str w8, [sp, #360] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #488] ; NONEON-NOSVE-NEXT: str w8, [sp, #356] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #490] ; NONEON-NOSVE-NEXT: str w8, [sp, #352] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #492] ; NONEON-NOSVE-NEXT: str w8, [sp, #348] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #494] ; NONEON-NOSVE-NEXT: str w8, [sp, #344] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #448] ; NONEON-NOSVE-NEXT: str w8, [sp, #340] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #450] ; NONEON-NOSVE-NEXT: str w8, [sp, #336] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #452] ; NONEON-NOSVE-NEXT: str w8, [sp, #332] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #454] ; NONEON-NOSVE-NEXT: str w8, [sp, #328] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #456] ; NONEON-NOSVE-NEXT: str w8, [sp, #324] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #458] ; NONEON-NOSVE-NEXT: str w8, [sp, #320] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #460] ; NONEON-NOSVE-NEXT: str w8, [sp, #316] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #462] ; NONEON-NOSVE-NEXT: str w8, [sp, #312] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #464] ; NONEON-NOSVE-NEXT: str w8, [sp, #308] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #466] ; NONEON-NOSVE-NEXT: str w8, [sp, #304] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #468] ; NONEON-NOSVE-NEXT: str w8, [sp, #300] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #470] ; NONEON-NOSVE-NEXT: str w8, [sp, #296] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #472] ; NONEON-NOSVE-NEXT: str w8, [sp, #292] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #474] ; NONEON-NOSVE-NEXT: str w8, [sp, #288] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #476] ; NONEON-NOSVE-NEXT: str w8, [sp, #284] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #478] ; NONEON-NOSVE-NEXT: str w8, [sp, #280] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #656] ; NONEON-NOSVE-NEXT: str w8, [sp, #276] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #658] ; NONEON-NOSVE-NEXT: str w8, [sp, #272] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #660] ; NONEON-NOSVE-NEXT: str w8, [sp, #268] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #662] ; NONEON-NOSVE-NEXT: str w8, [sp, #264] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #664] ; NONEON-NOSVE-NEXT: str w8, [sp, #260] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #668] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #252] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #670] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #528] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #244] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #530] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #532] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #236] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #534] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #536] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #228] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #538] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #540] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #220] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #542] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #496] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #212] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #498] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #500] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #204] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #502] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #504] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #196] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #506] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #508] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #188] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #510] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #512] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #180] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #514] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #516] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #172] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #518] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #520] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #164] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #522] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #524] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #156] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #526] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #640] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #148] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #642] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #644] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #140] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #646] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #648] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #132] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #650] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #652] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #124] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #654] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #576] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #116] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #578] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #580] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #108] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #582] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #584] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #100] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #586] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #588] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #92] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #590] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #544] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #84] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #546] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #548] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #76] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #550] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #552] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #68] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #554] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #556] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #558] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #560] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #562] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #564] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #566] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #568] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #570] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #572] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #574] ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #416] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #602] ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #604] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: strb w8, [sp, #765] ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: strb w8, [sp, #764] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: strb w8, [sp, #763] ; NONEON-NOSVE-NEXT: add w8, w12, w12 ; NONEON-NOSVE-NEXT: strb w8, [sp, #762] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strb w8, [sp, #761] ; NONEON-NOSVE-NEXT: add w8, w14, w14 ; NONEON-NOSVE-NEXT: strb w8, [sp, #760] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: strb w8, [sp, #759] ; NONEON-NOSVE-NEXT: add w8, w16, w16 ; NONEON-NOSVE-NEXT: strb w8, [sp, #758] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: strb w8, [sp, #757] ; NONEON-NOSVE-NEXT: add w8, w18, w18 ; NONEON-NOSVE-NEXT: strb w8, [sp, #756] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: strb w8, [sp, #755] ; NONEON-NOSVE-NEXT: add w8, w1, w1 ; NONEON-NOSVE-NEXT: strb w8, [sp, #754] ; NONEON-NOSVE-NEXT: add w8, w2, w2 ; NONEON-NOSVE-NEXT: strb w8, [sp, #753] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: strb w8, [sp, #752] ; NONEON-NOSVE-NEXT: add w8, w4, w4 ; NONEON-NOSVE-NEXT: strb w8, [sp, #751] ; NONEON-NOSVE-NEXT: add w8, w5, w5 ; NONEON-NOSVE-NEXT: strb w8, [sp, #750] ; NONEON-NOSVE-NEXT: add w8, w6, w6 ; NONEON-NOSVE-NEXT: strb w8, [sp, #749] ; NONEON-NOSVE-NEXT: add w8, w7, w7 ; NONEON-NOSVE-NEXT: strb w8, [sp, #748] ; NONEON-NOSVE-NEXT: add w8, w19, w19 ; NONEON-NOSVE-NEXT: strb w8, [sp, #747] ; NONEON-NOSVE-NEXT: add w8, w20, w20 ; NONEON-NOSVE-NEXT: strb w8, [sp, #746] ; NONEON-NOSVE-NEXT: add w8, w21, w21 ; NONEON-NOSVE-NEXT: strb w8, [sp, #745] ; NONEON-NOSVE-NEXT: add w8, w22, w22 ; NONEON-NOSVE-NEXT: strb w8, [sp, #744] ; NONEON-NOSVE-NEXT: add w8, w23, w23 ; NONEON-NOSVE-NEXT: strb w8, [sp, #743] ; NONEON-NOSVE-NEXT: add w8, w24, w24 ; NONEON-NOSVE-NEXT: strb w8, [sp, #742] ; NONEON-NOSVE-NEXT: add w8, w25, w25 ; NONEON-NOSVE-NEXT: strb w8, [sp, #741] ; NONEON-NOSVE-NEXT: add w8, w26, w26 ; NONEON-NOSVE-NEXT: strb w8, [sp, #740] ; NONEON-NOSVE-NEXT: add w8, w27, w27 ; NONEON-NOSVE-NEXT: strb w8, [sp, #739] ; NONEON-NOSVE-NEXT: add w8, w28, w28 ; NONEON-NOSVE-NEXT: strb w8, [sp, #738] ; NONEON-NOSVE-NEXT: add w8, w29, w29 ; NONEON-NOSVE-NEXT: strb w8, [sp, #737] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #766] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #736] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #736] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #735] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #734] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #733] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #732] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #731] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #730] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #729] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #728] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #727] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #726] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #725] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #724] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #723] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #722] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #721] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #720] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #783] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #782] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #781] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #780] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #779] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #778] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #777] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #776] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #775] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #774] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #773] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #772] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #771] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #770] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #769] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #768] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #152] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #719] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #156] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #718] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #717] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #164] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #716] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #168] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #715] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #172] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #714] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #713] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #180] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #712] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #711] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #188] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #710] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #709] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #196] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #708] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #200] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #707] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #204] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #706] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #208] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #705] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #212] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #704] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #216] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #704] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #799] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #220] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #798] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #224] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #797] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #228] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #796] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #232] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #795] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #236] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #794] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #240] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #793] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #244] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #792] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #248] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #791] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #252] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #790] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #256] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #789] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #260] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #788] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #264] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #787] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #786] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #272] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #785] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #276] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #784] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #280] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #768] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #687] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #284] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #686] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #288] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #685] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #292] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #684] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #296] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #683] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #300] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #682] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #304] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #681] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #308] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #680] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #312] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #679] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #316] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #678] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #320] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #677] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #324] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #676] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #328] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #675] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #674] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #336] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #673] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #340] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #672] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #344] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #703] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #348] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #702] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #352] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #701] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #356] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #700] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #360] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #699] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #364] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #698] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #368] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #697] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #372] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #696] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #376] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #695] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #380] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #694] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #384] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #693] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #388] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #692] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #392] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #691] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #396] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #690] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #400] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #689] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #404] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #688] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #408] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #672] ; NONEON-NOSVE-NEXT: stp q1, q0, [x8] ; NONEON-NOSVE-NEXT: stp q4, q3, [x8, #32] ; NONEON-NOSVE-NEXT: stp q7, q6, [x8, #64] ; NONEON-NOSVE-NEXT: stp q2, q5, [x8, #96] ; NONEON-NOSVE-NEXT: add sp, sp, #800 ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ret %a = load <128 x i16>, ptr %in %b = trunc <128 x i16> %a to <128 x i8> %c = add <128 x i8> %b, %b store <128 x i8> %c, ptr %out ret void } ; ; truncate i32 -> i8 ; define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v8i32_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z2.h, z1.h, z1.h ; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h } ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v8i32_v8i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] ; NONEON-NOSVE-NEXT: strb w9, [sp, #47] ; NONEON-NOSVE-NEXT: strb w8, [sp, #46] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] ; NONEON-NOSVE-NEXT: strb w9, [sp, #45] ; NONEON-NOSVE-NEXT: strb w8, [sp, #44] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] ; NONEON-NOSVE-NEXT: strb w9, [sp, #43] ; NONEON-NOSVE-NEXT: strb w8, [sp, #42] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] ; NONEON-NOSVE-NEXT: strb w9, [sp, #41] ; NONEON-NOSVE-NEXT: strb w8, [sp, #40] ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] ; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = trunc <8 x i32> %a to <8 x i8> ret <8 x i8> %b } define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v16i32_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h ; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h } ; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b ; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b ; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v16i32_v16i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #80 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] ; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] ; NONEON-NOSVE-NEXT: str q1, [sp, #48] ; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] ; NONEON-NOSVE-NEXT: str q2, [sp] ; NONEON-NOSVE-NEXT: strb w9, [sp, #79] ; NONEON-NOSVE-NEXT: strb w8, [sp, #78] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] ; NONEON-NOSVE-NEXT: strb w9, [sp, #77] ; NONEON-NOSVE-NEXT: strb w8, [sp, #76] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] ; NONEON-NOSVE-NEXT: strb w9, [sp, #75] ; NONEON-NOSVE-NEXT: strb w8, [sp, #74] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] ; NONEON-NOSVE-NEXT: strb w9, [sp, #73] ; NONEON-NOSVE-NEXT: strb w8, [sp, #72] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] ; NONEON-NOSVE-NEXT: strb w9, [sp, #71] ; NONEON-NOSVE-NEXT: strb w8, [sp, #70] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] ; NONEON-NOSVE-NEXT: strb w9, [sp, #69] ; NONEON-NOSVE-NEXT: strb w8, [sp, #68] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] ; NONEON-NOSVE-NEXT: strb w9, [sp, #67] ; NONEON-NOSVE-NEXT: strb w8, [sp, #66] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] ; NONEON-NOSVE-NEXT: strb w9, [sp, #65] ; NONEON-NOSVE-NEXT: strb w8, [sp, #64] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] ; NONEON-NOSVE-NEXT: add sp, sp, #80 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i32>, ptr %in %b = trunc <16 x i32> %a to <16 x i8> ret <16 x i8> %b } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i32_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #96] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: ldp q4, q5, [x0, #64] ; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z17.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z16.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h ; CHECK-NEXT: uzp1 z19.h, z5.h, z5.h ; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z3.h, z7.h, z7.h ; CHECK-NEXT: uzp1 z18.h, z4.h, z4.h ; CHECK-NEXT: uzp1 z2.h, z6.h, z6.h ; CHECK-NEXT: splice z4.h, p0, { z16.h, z17.h } ; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h } ; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h } ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b ; CHECK-NEXT: uzp1 z7.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b ; CHECK-NEXT: uzp1 z6.b, z1.b, z1.b ; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b } ; CHECK-NEXT: splice z1.b, p0, { z6.b, z7.b } ; CHECK-NEXT: add z0.b, z0.b, z0.b ; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i32_v32i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #272 ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #192] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #208] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #224] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #240] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] ; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #80] ; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #112] ; NONEON-NOSVE-NEXT: stp q5, q7, [sp, #16] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] ; NONEON-NOSVE-NEXT: ldp w27, w28, [sp, #112] ; NONEON-NOSVE-NEXT: ldp w25, w26, [sp, #104] ; NONEON-NOSVE-NEXT: add w6, w8, w8 ; NONEON-NOSVE-NEXT: add w5, w9, w9 ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #256] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w10, w8, [sp, #128] ; NONEON-NOSVE-NEXT: ldp w23, w24, [sp, #96] ; NONEON-NOSVE-NEXT: ldp w21, w22, [sp, #24] ; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #120] ; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #48] ; NONEON-NOSVE-NEXT: ldp w19, w20, [sp, #16] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: strb w8, [sp, #155] ; NONEON-NOSVE-NEXT: add w8, w28, w28 ; NONEON-NOSVE-NEXT: strb w9, [sp, #154] ; NONEON-NOSVE-NEXT: add w9, w27, w27 ; NONEON-NOSVE-NEXT: strb w8, [sp, #153] ; NONEON-NOSVE-NEXT: add w8, w26, w26 ; NONEON-NOSVE-NEXT: strb w9, [sp, #152] ; NONEON-NOSVE-NEXT: add w9, w25, w25 ; NONEON-NOSVE-NEXT: ldp w4, w7, [sp, #56] ; NONEON-NOSVE-NEXT: strb w8, [sp, #151] ; NONEON-NOSVE-NEXT: add w8, w24, w24 ; NONEON-NOSVE-NEXT: strb w9, [sp, #150] ; NONEON-NOSVE-NEXT: add w9, w23, w23 ; NONEON-NOSVE-NEXT: ldp w2, w3, [sp, #48] ; NONEON-NOSVE-NEXT: strb w8, [sp, #149] ; NONEON-NOSVE-NEXT: add w8, w22, w22 ; NONEON-NOSVE-NEXT: strb w9, [sp, #148] ; NONEON-NOSVE-NEXT: add w9, w21, w21 ; NONEON-NOSVE-NEXT: ldp w18, w0, [sp, #40] ; NONEON-NOSVE-NEXT: strb w8, [sp, #147] ; NONEON-NOSVE-NEXT: add w8, w20, w20 ; NONEON-NOSVE-NEXT: strb w9, [sp, #146] ; NONEON-NOSVE-NEXT: add w9, w19, w19 ; NONEON-NOSVE-NEXT: ldp w16, w17, [sp, #32] ; NONEON-NOSVE-NEXT: strb w8, [sp, #145] ; NONEON-NOSVE-NEXT: add w8, w7, w7 ; NONEON-NOSVE-NEXT: strb w9, [sp, #144] ; NONEON-NOSVE-NEXT: add w9, w4, w4 ; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #72] ; NONEON-NOSVE-NEXT: strb w8, [sp, #175] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: strb w9, [sp, #174] ; NONEON-NOSVE-NEXT: add w9, w2, w2 ; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #64] ; NONEON-NOSVE-NEXT: strb w8, [sp, #173] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: strb w9, [sp, #172] ; NONEON-NOSVE-NEXT: add w9, w18, w18 ; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #136] ; NONEON-NOSVE-NEXT: strb w8, [sp, #171] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: strb w9, [sp, #170] ; NONEON-NOSVE-NEXT: add w9, w16, w16 ; NONEON-NOSVE-NEXT: strb w8, [sp, #169] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: strb w9, [sp, #168] ; NONEON-NOSVE-NEXT: add w9, w14, w14 ; NONEON-NOSVE-NEXT: strb w8, [sp, #167] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strb w9, [sp, #166] ; NONEON-NOSVE-NEXT: add w9, w12, w12 ; NONEON-NOSVE-NEXT: ldp w29, w30, [sp, #80] ; NONEON-NOSVE-NEXT: strb w8, [sp, #165] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: strb w9, [sp, #164] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: strb w8, [sp, #163] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #162] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w5, [sp, #159] ; NONEON-NOSVE-NEXT: add w5, w30, w30 ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w6, [sp, #158] ; NONEON-NOSVE-NEXT: add w6, w29, w29 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: strb w5, [sp, #157] ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #256] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w6, [sp, #156] ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #240] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #161] ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #224] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #160] ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #208] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #144] ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #192] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] ; NONEON-NOSVE-NEXT: add sp, sp, #272 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i32>, ptr %in %b = trunc <32 x i32> %a to <32 x i8> %c = add <32 x i8> %b, %b store <32 x i8> %c, ptr %out ret void } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v64i32_v64i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #160] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ldp q4, q5, [x0, #96] ; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h ; CHECK-NEXT: ldp q3, q18, [x0, #128] ; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h ; CHECK-NEXT: ldp q2, q19, [x0, #192] ; CHECK-NEXT: ldp q0, q1, [x0, #64] ; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h ; CHECK-NEXT: ldp q18, q22, [x0, #224] ; CHECK-NEXT: uzp1 z20.h, z3.h, z3.h ; CHECK-NEXT: ldp q3, q23, [x0, #32] ; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h } ; CHECK-NEXT: uzp1 z27.h, z19.h, z19.h ; CHECK-NEXT: uzp1 z25.h, z22.h, z22.h ; CHECK-NEXT: uzp1 z26.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z24.h, z18.h, z18.h ; CHECK-NEXT: uzp1 z18.h, z23.h, z23.h ; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h ; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h ; CHECK-NEXT: uzp1 z3.h, z7.h, z7.h ; CHECK-NEXT: uzp1 z22.h, z4.h, z4.h ; CHECK-NEXT: uzp1 z2.h, z6.h, z6.h ; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h ; CHECK-NEXT: splice z1.h, p0, { z20.h, z21.h } ; CHECK-NEXT: splice z6.h, p0, { z24.h, z25.h } ; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h ; CHECK-NEXT: splice z0.h, p0, { z26.h, z27.h } ; CHECK-NEXT: splice z7.h, p0, { z17.h, z18.h } ; CHECK-NEXT: uzp1 z17.b, z16.b, z16.b ; CHECK-NEXT: splice z2.h, p0, { z2.h, z3.h } ; CHECK-NEXT: splice z3.h, p0, { z22.h, z23.h } ; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h } ; CHECK-NEXT: uzp1 z16.b, z1.b, z1.b ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b ; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z1.b, z7.b, z7.b ; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b ; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b ; CHECK-NEXT: splice z7.b, p0, { z16.b, z17.b } ; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b ; CHECK-NEXT: splice z4.b, p0, { z5.b, z6.b } ; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b } ; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b } ; CHECK-NEXT: add z2.b, z7.b, z7.b ; CHECK-NEXT: add z3.b, z4.b, z4.b ; CHECK-NEXT: add z0.b, z0.b, z0.b ; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: stp q2, q3, [x1, #32] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: sub sp, sp, #480 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96] ; NONEON-NOSVE-NEXT: str x1, [sp, #152] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64] ; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128] ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32] ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0] ; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224] ; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192] ; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #288] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #316] ; NONEON-NOSVE-NEXT: str q18, [sp, #208] ; NONEON-NOSVE-NEXT: ldr w10, [sp, #304] ; NONEON-NOSVE-NEXT: stp q21, q19, [sp, #176] ; NONEON-NOSVE-NEXT: ldr w11, [sp, #296] ; NONEON-NOSVE-NEXT: ldr w12, [sp, #292] ; NONEON-NOSVE-NEXT: add w20, w8, w8 ; NONEON-NOSVE-NEXT: stp q20, q23, [sp, #224] ; NONEON-NOSVE-NEXT: ldr w13, [sp, #288] ; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #256] ; NONEON-NOSVE-NEXT: ldr w22, [sp, #312] ; NONEON-NOSVE-NEXT: stp q3, q17, [sp, #384] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #400] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #404] ; NONEON-NOSVE-NEXT: str q7, [sp, #160] ; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #320] ; NONEON-NOSVE-NEXT: ldr w18, [sp, #396] ; NONEON-NOSVE-NEXT: ldr w0, [sp, #392] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #408] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #412] ; NONEON-NOSVE-NEXT: ldr w14, [sp, #332] ; NONEON-NOSVE-NEXT: ldr w15, [sp, #328] ; NONEON-NOSVE-NEXT: ldr w16, [sp, #324] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #272] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #276] ; NONEON-NOSVE-NEXT: ldr w17, [sp, #320] ; NONEON-NOSVE-NEXT: ldr w1, [sp, #388] ; NONEON-NOSVE-NEXT: ldr w2, [sp, #384] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #280] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #284] ; NONEON-NOSVE-NEXT: ldr w3, [sp, #348] ; NONEON-NOSVE-NEXT: ldr w4, [sp, #344] ; NONEON-NOSVE-NEXT: ldr w5, [sp, #340] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w6, [sp, #336] ; NONEON-NOSVE-NEXT: stp q6, q5, [sp, #352] ; NONEON-NOSVE-NEXT: ldr w7, [sp, #380] ; NONEON-NOSVE-NEXT: ldr w19, [sp, #376] ; NONEON-NOSVE-NEXT: ldr w21, [sp, #372] ; NONEON-NOSVE-NEXT: ldr w23, [sp, #368] ; NONEON-NOSVE-NEXT: ldr w24, [sp, #364] ; NONEON-NOSVE-NEXT: ldr w25, [sp, #360] ; NONEON-NOSVE-NEXT: ldr w26, [sp, #356] ; NONEON-NOSVE-NEXT: ldr w27, [sp, #352] ; NONEON-NOSVE-NEXT: strb w20, [sp, #463] ; NONEON-NOSVE-NEXT: add w20, w22, w22 ; NONEON-NOSVE-NEXT: strb w20, [sp, #462] ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #240] ; NONEON-NOSVE-NEXT: ldp w29, w28, [sp, #168] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #248] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #256] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #260] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #264] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #176] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #184] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #224] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #232] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #192] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #200] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #208] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #216] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #300] ; NONEON-NOSVE-NEXT: ldp w8, w30, [sp, #160] ; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w8, [sp, #308] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #461] ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: strb w8, [sp, #460] ; NONEON-NOSVE-NEXT: add w8, w9, w9 ; NONEON-NOSVE-NEXT: strb w8, [sp, #459] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: strb w8, [sp, #458] ; NONEON-NOSVE-NEXT: add w8, w12, w12 ; NONEON-NOSVE-NEXT: strb w8, [sp, #457] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strb w8, [sp, #456] ; NONEON-NOSVE-NEXT: add w8, w14, w14 ; NONEON-NOSVE-NEXT: strb w8, [sp, #455] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: strb w8, [sp, #454] ; NONEON-NOSVE-NEXT: add w8, w16, w16 ; NONEON-NOSVE-NEXT: strb w8, [sp, #453] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: strb w8, [sp, #452] ; NONEON-NOSVE-NEXT: add w8, w18, w18 ; NONEON-NOSVE-NEXT: strb w8, [sp, #451] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: strb w8, [sp, #450] ; NONEON-NOSVE-NEXT: add w8, w1, w1 ; NONEON-NOSVE-NEXT: strb w8, [sp, #449] ; NONEON-NOSVE-NEXT: add w8, w2, w2 ; NONEON-NOSVE-NEXT: strb w8, [sp, #448] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: strb w8, [sp, #447] ; NONEON-NOSVE-NEXT: add w8, w4, w4 ; NONEON-NOSVE-NEXT: strb w8, [sp, #446] ; NONEON-NOSVE-NEXT: add w8, w5, w5 ; NONEON-NOSVE-NEXT: strb w8, [sp, #445] ; NONEON-NOSVE-NEXT: add w8, w6, w6 ; NONEON-NOSVE-NEXT: strb w8, [sp, #444] ; NONEON-NOSVE-NEXT: add w8, w7, w7 ; NONEON-NOSVE-NEXT: strb w8, [sp, #443] ; NONEON-NOSVE-NEXT: add w8, w19, w19 ; NONEON-NOSVE-NEXT: strb w8, [sp, #442] ; NONEON-NOSVE-NEXT: add w8, w21, w21 ; NONEON-NOSVE-NEXT: strb w8, [sp, #441] ; NONEON-NOSVE-NEXT: add w8, w23, w23 ; NONEON-NOSVE-NEXT: strb w8, [sp, #440] ; NONEON-NOSVE-NEXT: add w8, w24, w24 ; NONEON-NOSVE-NEXT: strb w8, [sp, #439] ; NONEON-NOSVE-NEXT: add w8, w25, w25 ; NONEON-NOSVE-NEXT: strb w8, [sp, #438] ; NONEON-NOSVE-NEXT: add w8, w26, w26 ; NONEON-NOSVE-NEXT: strb w8, [sp, #437] ; NONEON-NOSVE-NEXT: add w8, w27, w27 ; NONEON-NOSVE-NEXT: strb w8, [sp, #436] ; NONEON-NOSVE-NEXT: add w8, w28, w28 ; NONEON-NOSVE-NEXT: strb w8, [sp, #435] ; NONEON-NOSVE-NEXT: add w8, w29, w29 ; NONEON-NOSVE-NEXT: strb w8, [sp, #434] ; NONEON-NOSVE-NEXT: add w8, w30, w30 ; NONEON-NOSVE-NEXT: strb w8, [sp, #433] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #432] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #431] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #430] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #429] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #428] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #427] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #426] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #425] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #424] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #423] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #422] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #421] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #420] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #419] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #418] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #417] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #416] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #416] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #479] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #478] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #477] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #476] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #475] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #474] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #473] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #472] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #471] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #470] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #469] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #468] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #467] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #466] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #465] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #148] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w8, [sp, #464] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #152] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #448] ; NONEON-NOSVE-NEXT: stp q3, q2, [x8] ; NONEON-NOSVE-NEXT: stp q0, q1, [x8, #32] ; NONEON-NOSVE-NEXT: add sp, sp, #480 ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ret %a = load <64 x i32>, ptr %in %b = trunc <64 x i32> %a to <64 x i8> %c = add <64 x i8> %b, %b store <64 x i8> %c, ptr %out ret void } ; ; truncate i32 -> i16 ; define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind { ; CHECK-LABEL: trunc_v8i32_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z2.h, z1.h, z1.h ; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v8i32_v8i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] ; NONEON-NOSVE-NEXT: strh w9, [sp, #46] ; NONEON-NOSVE-NEXT: strh w8, [sp, #44] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] ; NONEON-NOSVE-NEXT: strh w9, [sp, #42] ; NONEON-NOSVE-NEXT: strh w8, [sp, #40] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] ; NONEON-NOSVE-NEXT: strh w9, [sp, #38] ; NONEON-NOSVE-NEXT: strh w8, [sp, #36] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp] ; NONEON-NOSVE-NEXT: strh w9, [sp, #34] ; NONEON-NOSVE-NEXT: strh w8, [sp, #32] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] ; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = trunc <8 x i32> %a to <8 x i16> ret <8 x i16> %b } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v16i32_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h ; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h } ; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: add z1.h, z2.h, z2.h ; CHECK-NEXT: add z0.h, z0.h, z0.h ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v16i32_v16i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] ; NONEON-NOSVE-NEXT: stp q3, q1, [sp] ; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #40] ; NONEON-NOSVE-NEXT: ldp w2, w3, [sp, #32] ; NONEON-NOSVE-NEXT: ldp w4, w5, [sp, #8] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: ldp w18, w0, [sp] ; NONEON-NOSVE-NEXT: ldp w16, w17, [sp, #24] ; NONEON-NOSVE-NEXT: strh w8, [sp, #78] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: strh w9, [sp, #76] ; NONEON-NOSVE-NEXT: add w9, w2, w2 ; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #16] ; NONEON-NOSVE-NEXT: strh w8, [sp, #74] ; NONEON-NOSVE-NEXT: add w8, w5, w5 ; NONEON-NOSVE-NEXT: strh w9, [sp, #72] ; NONEON-NOSVE-NEXT: add w9, w4, w4 ; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #56] ; NONEON-NOSVE-NEXT: strh w8, [sp, #70] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: strh w9, [sp, #68] ; NONEON-NOSVE-NEXT: add w9, w18, w18 ; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #48] ; NONEON-NOSVE-NEXT: strh w8, [sp, #66] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: strh w9, [sp, #64] ; NONEON-NOSVE-NEXT: add w9, w16, w16 ; NONEON-NOSVE-NEXT: strh w8, [sp, #94] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: strh w9, [sp, #92] ; NONEON-NOSVE-NEXT: add w9, w14, w14 ; NONEON-NOSVE-NEXT: strh w8, [sp, #90] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strh w9, [sp, #88] ; NONEON-NOSVE-NEXT: add w9, w12, w12 ; NONEON-NOSVE-NEXT: strh w8, [sp, #86] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: strh w9, [sp, #84] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: strh w8, [sp, #82] ; NONEON-NOSVE-NEXT: strh w9, [sp, #80] ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] ; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i32>, ptr %in %b = trunc <16 x i32> %a to <16 x i16> %c = add <16 x i16> %b, %b store <16 x i16> %c, ptr %out ret void } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i32_v32i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #64] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ldp q2, q3, [x0, #96] ; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: uzp1 z7.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z6.h, z1.h, z1.h ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h ; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h ; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h ; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h ; CHECK-NEXT: splice z0.h, p0, { z6.h, z7.h } ; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h ; CHECK-NEXT: splice z1.h, p0, { z16.h, z17.h } ; CHECK-NEXT: splice z2.h, p0, { z2.h, z3.h } ; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h } ; CHECK-NEXT: add z0.h, z0.h, z0.h ; CHECK-NEXT: add z1.h, z1.h, z1.h ; CHECK-NEXT: add z2.h, z2.h, z2.h ; CHECK-NEXT: add z3.h, z3.h, z3.h ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i32_v32i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #304 ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] ; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #80] ; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #112] ; NONEON-NOSVE-NEXT: stp q5, q7, [sp, #16] ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #88] ; NONEON-NOSVE-NEXT: ldp w27, w28, [sp, #112] ; NONEON-NOSVE-NEXT: ldp w25, w26, [sp, #104] ; NONEON-NOSVE-NEXT: add w6, w8, w8 ; NONEON-NOSVE-NEXT: add w5, w9, w9 ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w10, w8, [sp, #128] ; NONEON-NOSVE-NEXT: ldp w23, w24, [sp, #96] ; NONEON-NOSVE-NEXT: ldp w21, w22, [sp, #24] ; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #120] ; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #48] ; NONEON-NOSVE-NEXT: ldp w19, w20, [sp, #16] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: strh w8, [sp, #182] ; NONEON-NOSVE-NEXT: add w8, w28, w28 ; NONEON-NOSVE-NEXT: strh w9, [sp, #180] ; NONEON-NOSVE-NEXT: add w9, w27, w27 ; NONEON-NOSVE-NEXT: strh w8, [sp, #178] ; NONEON-NOSVE-NEXT: add w8, w26, w26 ; NONEON-NOSVE-NEXT: strh w9, [sp, #176] ; NONEON-NOSVE-NEXT: add w9, w25, w25 ; NONEON-NOSVE-NEXT: ldp w4, w7, [sp, #56] ; NONEON-NOSVE-NEXT: strh w8, [sp, #174] ; NONEON-NOSVE-NEXT: add w8, w24, w24 ; NONEON-NOSVE-NEXT: strh w9, [sp, #172] ; NONEON-NOSVE-NEXT: add w9, w23, w23 ; NONEON-NOSVE-NEXT: ldp w2, w3, [sp, #48] ; NONEON-NOSVE-NEXT: strh w8, [sp, #170] ; NONEON-NOSVE-NEXT: add w8, w22, w22 ; NONEON-NOSVE-NEXT: strh w9, [sp, #168] ; NONEON-NOSVE-NEXT: add w9, w21, w21 ; NONEON-NOSVE-NEXT: ldp w18, w0, [sp, #40] ; NONEON-NOSVE-NEXT: strh w8, [sp, #166] ; NONEON-NOSVE-NEXT: add w8, w20, w20 ; NONEON-NOSVE-NEXT: strh w9, [sp, #164] ; NONEON-NOSVE-NEXT: add w9, w19, w19 ; NONEON-NOSVE-NEXT: ldp w16, w17, [sp, #32] ; NONEON-NOSVE-NEXT: strh w8, [sp, #162] ; NONEON-NOSVE-NEXT: add w8, w7, w7 ; NONEON-NOSVE-NEXT: strh w9, [sp, #160] ; NONEON-NOSVE-NEXT: add w9, w4, w4 ; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #72] ; NONEON-NOSVE-NEXT: strh w8, [sp, #158] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: strh w9, [sp, #156] ; NONEON-NOSVE-NEXT: add w9, w2, w2 ; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #64] ; NONEON-NOSVE-NEXT: strh w8, [sp, #154] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: strh w9, [sp, #152] ; NONEON-NOSVE-NEXT: add w9, w18, w18 ; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #136] ; NONEON-NOSVE-NEXT: strh w8, [sp, #150] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: strh w9, [sp, #148] ; NONEON-NOSVE-NEXT: add w9, w16, w16 ; NONEON-NOSVE-NEXT: strh w8, [sp, #146] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: strh w9, [sp, #144] ; NONEON-NOSVE-NEXT: add w9, w14, w14 ; NONEON-NOSVE-NEXT: strh w8, [sp, #206] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strh w9, [sp, #204] ; NONEON-NOSVE-NEXT: add w9, w12, w12 ; NONEON-NOSVE-NEXT: ldp w29, w30, [sp, #80] ; NONEON-NOSVE-NEXT: strh w8, [sp, #202] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: strh w9, [sp, #200] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: strh w8, [sp, #198] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: strh w9, [sp, #196] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: strh w5, [sp, #190] ; NONEON-NOSVE-NEXT: add w5, w30, w30 ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w6, [sp, #188] ; NONEON-NOSVE-NEXT: add w6, w29, w29 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: strh w5, [sp, #186] ; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #144] ; NONEON-NOSVE-NEXT: strh w6, [sp, #184] ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strh w8, [sp, #194] ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strh w9, [sp, #192] ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #176] ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q3, q2, [x1] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1, #32] ; NONEON-NOSVE-NEXT: add sp, sp, #304 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i32>, ptr %in %b = trunc <32 x i32> %a to <32 x i16> %c = add <32 x i16> %b, %b store <32 x i16> %c, ptr %out ret void } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v64i32_v64i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #192] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: ldp q6, q7, [x0, #64] ; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h ; CHECK-NEXT: ldp q3, q18, [x0, #224] ; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h ; CHECK-NEXT: ldp q2, q19, [x0, #128] ; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h ; CHECK-NEXT: ldp q18, q22, [x0, #160] ; CHECK-NEXT: uzp1 z20.h, z3.h, z3.h ; CHECK-NEXT: uzp1 z24.h, z19.h, z19.h ; CHECK-NEXT: ldp q3, q19, [x0, #96] ; CHECK-NEXT: uzp1 z23.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z26.h, z22.h, z22.h ; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h } ; CHECK-NEXT: uzp1 z17.h, z7.h, z7.h ; CHECK-NEXT: uzp1 z25.h, z18.h, z18.h ; CHECK-NEXT: splice z7.h, p0, { z20.h, z21.h } ; CHECK-NEXT: uzp1 z21.h, z5.h, z5.h ; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h ; CHECK-NEXT: uzp1 z20.h, z4.h, z4.h ; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z16.h, z6.h, z6.h ; CHECK-NEXT: splice z6.h, p0, { z23.h, z24.h } ; CHECK-NEXT: uzp1 z18.h, z3.h, z3.h ; CHECK-NEXT: splice z3.h, p0, { z25.h, z26.h } ; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h ; CHECK-NEXT: add z0.h, z2.h, z2.h ; CHECK-NEXT: add z7.h, z7.h, z7.h ; CHECK-NEXT: splice z1.h, p0, { z16.h, z17.h } ; CHECK-NEXT: splice z2.h, p0, { z18.h, z19.h } ; CHECK-NEXT: splice z16.h, p0, { z20.h, z21.h } ; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h } ; CHECK-NEXT: add z6.h, z6.h, z6.h ; CHECK-NEXT: add z3.h, z3.h, z3.h ; CHECK-NEXT: stp q0, q7, [x1, #96] ; CHECK-NEXT: add z0.h, z1.h, z1.h ; CHECK-NEXT: add z1.h, z2.h, z2.h ; CHECK-NEXT: add z2.h, z16.h, z16.h ; CHECK-NEXT: stp q6, q3, [x1, #64] ; CHECK-NEXT: add z3.h, z4.h, z4.h ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: sub sp, sp, #528 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] ; NONEON-NOSVE-NEXT: mov x5, x1 ; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #192] ; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #224] ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96] ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64] ; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #160] ; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #128] ; NONEON-NOSVE-NEXT: str q0, [sp, #320] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #332] ; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #160] ; NONEON-NOSVE-NEXT: ldr w10, [sp, #320] ; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #192] ; NONEON-NOSVE-NEXT: ldr w23, [sp, #328] ; NONEON-NOSVE-NEXT: add w21, w8, w8 ; NONEON-NOSVE-NEXT: stp q18, q20, [sp, #240] ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #160] ; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #368] ; NONEON-NOSVE-NEXT: str q19, [sp, #224] ; NONEON-NOSVE-NEXT: ldr w29, [sp, #380] ; NONEON-NOSVE-NEXT: ldr w30, [sp, #376] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #168] ; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #288] ; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #336] ; NONEON-NOSVE-NEXT: ldr w3, [sp, #300] ; NONEON-NOSVE-NEXT: ldr w4, [sp, #296] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w11, [sp, #360] ; NONEON-NOSVE-NEXT: ldr w12, [sp, #356] ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #208] ; NONEON-NOSVE-NEXT: ldr w13, [sp, #352] ; NONEON-NOSVE-NEXT: ldr w14, [sp, #348] ; NONEON-NOSVE-NEXT: ldr w15, [sp, #344] ; NONEON-NOSVE-NEXT: str q3, [sp, #144] ; NONEON-NOSVE-NEXT: ldr w16, [sp, #340] ; NONEON-NOSVE-NEXT: ldr w17, [sp, #336] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w6, [sp, #292] ; NONEON-NOSVE-NEXT: ldr w7, [sp, #288] ; NONEON-NOSVE-NEXT: str q5, [sp, #272] ; NONEON-NOSVE-NEXT: ldr w25, [sp, #316] ; NONEON-NOSVE-NEXT: ldr w26, [sp, #312] ; NONEON-NOSVE-NEXT: ldr w19, [sp, #284] ; NONEON-NOSVE-NEXT: ldr w20, [sp, #280] ; NONEON-NOSVE-NEXT: ldr w22, [sp, #276] ; NONEON-NOSVE-NEXT: ldr w24, [sp, #272] ; NONEON-NOSVE-NEXT: ldr w27, [sp, #308] ; NONEON-NOSVE-NEXT: ldr w28, [sp, #304] ; NONEON-NOSVE-NEXT: strh w21, [sp, #494] ; NONEON-NOSVE-NEXT: add w21, w23, w23 ; NONEON-NOSVE-NEXT: strh w21, [sp, #492] ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #216] ; NONEON-NOSVE-NEXT: ldp w0, w18, [sp, #152] ; NONEON-NOSVE-NEXT: ldp w2, w1, [sp, #144] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #176] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #184] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #192] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #200] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #384] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #388] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #392] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #396] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #256] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #260] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #264] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #268] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #224] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #232] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #240] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #248] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w9, [sp, #368] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #372] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w8, [sp, #324] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #364] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #490] ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: strh w8, [sp, #488] ; NONEON-NOSVE-NEXT: add w8, w9, w9 ; NONEON-NOSVE-NEXT: strh w8, [sp, #486] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: strh w8, [sp, #484] ; NONEON-NOSVE-NEXT: add w8, w12, w12 ; NONEON-NOSVE-NEXT: strh w8, [sp, #482] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strh w8, [sp, #480] ; NONEON-NOSVE-NEXT: add w8, w14, w14 ; NONEON-NOSVE-NEXT: strh w8, [sp, #478] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: strh w8, [sp, #476] ; NONEON-NOSVE-NEXT: add w8, w16, w16 ; NONEON-NOSVE-NEXT: strh w8, [sp, #474] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: strh w8, [sp, #472] ; NONEON-NOSVE-NEXT: add w8, w18, w18 ; NONEON-NOSVE-NEXT: strh w8, [sp, #470] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: strh w8, [sp, #468] ; NONEON-NOSVE-NEXT: add w8, w1, w1 ; NONEON-NOSVE-NEXT: strh w8, [sp, #466] ; NONEON-NOSVE-NEXT: add w8, w2, w2 ; NONEON-NOSVE-NEXT: strh w8, [sp, #464] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: strh w8, [sp, #462] ; NONEON-NOSVE-NEXT: add w8, w4, w4 ; NONEON-NOSVE-NEXT: strh w8, [sp, #460] ; NONEON-NOSVE-NEXT: add w8, w6, w6 ; NONEON-NOSVE-NEXT: strh w8, [sp, #458] ; NONEON-NOSVE-NEXT: add w8, w7, w7 ; NONEON-NOSVE-NEXT: strh w8, [sp, #456] ; NONEON-NOSVE-NEXT: add w8, w19, w19 ; NONEON-NOSVE-NEXT: strh w8, [sp, #454] ; NONEON-NOSVE-NEXT: add w8, w20, w20 ; NONEON-NOSVE-NEXT: strh w8, [sp, #452] ; NONEON-NOSVE-NEXT: add w8, w22, w22 ; NONEON-NOSVE-NEXT: strh w8, [sp, #450] ; NONEON-NOSVE-NEXT: add w8, w24, w24 ; NONEON-NOSVE-NEXT: strh w8, [sp, #448] ; NONEON-NOSVE-NEXT: add w8, w25, w25 ; NONEON-NOSVE-NEXT: strh w8, [sp, #510] ; NONEON-NOSVE-NEXT: add w8, w26, w26 ; NONEON-NOSVE-NEXT: strh w8, [sp, #508] ; NONEON-NOSVE-NEXT: add w8, w27, w27 ; NONEON-NOSVE-NEXT: strh w8, [sp, #506] ; NONEON-NOSVE-NEXT: add w8, w28, w28 ; NONEON-NOSVE-NEXT: strh w8, [sp, #504] ; NONEON-NOSVE-NEXT: add w8, w29, w29 ; NONEON-NOSVE-NEXT: strh w8, [sp, #502] ; NONEON-NOSVE-NEXT: add w8, w30, w30 ; NONEON-NOSVE-NEXT: strh w8, [sp, #500] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #464] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #498] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #496] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #16] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #446] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #444] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #24] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #442] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #440] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #438] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #36] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #436] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #434] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #44] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #432] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #432] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #526] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #52] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #524] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #56] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #522] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #60] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #520] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #518] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #68] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #516] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #514] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #76] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #512] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #496] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #414] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #84] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #412] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #410] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #92] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #408] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #406] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #100] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #404] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #402] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #108] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #400] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #430] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #116] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #428] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #426] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #124] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #424] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #422] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #132] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #420] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #418] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #140] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w8, [sp, #416] ; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #400] ; NONEON-NOSVE-NEXT: stp q1, q0, [x5] ; NONEON-NOSVE-NEXT: stp q4, q3, [x5, #32] ; NONEON-NOSVE-NEXT: stp q7, q6, [x5, #64] ; NONEON-NOSVE-NEXT: stp q2, q5, [x5, #96] ; NONEON-NOSVE-NEXT: add sp, sp, #528 ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ret %a = load <64 x i32>, ptr %in %b = trunc <64 x i32> %a to <64 x i16> %c = add <64 x i16> %b, %b store <64 x i16> %c, ptr %out ret void } ; ; truncate i64 -> i8 ; ; NOTE: v4i8 is not legal so result i8 elements are held within i16 containers. define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v4i64_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s ; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s } ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] ; NONEON-NOSVE-NEXT: strh w8, [sp, #44] ; NONEON-NOSVE-NEXT: ldp x8, x10, [sp] ; NONEON-NOSVE-NEXT: strh w9, [sp, #46] ; NONEON-NOSVE-NEXT: strh w10, [sp, #42] ; NONEON-NOSVE-NEXT: strh w8, [sp, #40] ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] ; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in %b = trunc <4 x i64> %a to <4 x i8> ret <4 x i8> %b } define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v8i64_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s ; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s } ; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h ; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h } ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #80 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] ; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] ; NONEON-NOSVE-NEXT: str q1, [sp, #48] ; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16] ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] ; NONEON-NOSVE-NEXT: str q2, [sp] ; NONEON-NOSVE-NEXT: strb w8, [sp, #78] ; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #48] ; NONEON-NOSVE-NEXT: strb w9, [sp, #79] ; NONEON-NOSVE-NEXT: strb w8, [sp, #76] ; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #32] ; NONEON-NOSVE-NEXT: strb w10, [sp, #77] ; NONEON-NOSVE-NEXT: strb w8, [sp, #74] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] ; NONEON-NOSVE-NEXT: strb w11, [sp, #75] ; NONEON-NOSVE-NEXT: strb w8, [sp, #73] ; NONEON-NOSVE-NEXT: ldr x8, [sp] ; NONEON-NOSVE-NEXT: strb w8, [sp, #72] ; NONEON-NOSVE-NEXT: ldr d0, [sp, #72] ; NONEON-NOSVE-NEXT: add sp, sp, #80 ; NONEON-NOSVE-NEXT: ret %a = load <8 x i64>, ptr %in %b = trunc <8 x i64> %a to <8 x i8> ret <8 x i8> %b } define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v16i64_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #96] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: ldp q4, q5, [x0, #64] ; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z17.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z16.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z19.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z1.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z18.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z0.s, z4.s, z4.s ; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s ; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s ; CHECK-NEXT: splice z4.s, p0, { z16.s, z17.s } ; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: splice z1.s, p0, { z18.s, z19.s } ; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s } ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h ; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h ; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h } ; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b ; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b ; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v16i64_v16i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #144 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #96] ; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] ; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #32] ; NONEON-NOSVE-NEXT: ldp q6, q7, [x0, #64] ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #16] ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32] ; NONEON-NOSVE-NEXT: str q3, [sp, #80] ; NONEON-NOSVE-NEXT: str q2, [sp] ; NONEON-NOSVE-NEXT: stp q7, q5, [sp, #48] ; NONEON-NOSVE-NEXT: strb w8, [sp, #142] ; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #16] ; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #96] ; NONEON-NOSVE-NEXT: strb w9, [sp, #143] ; NONEON-NOSVE-NEXT: strb w8, [sp, #140] ; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #48] ; NONEON-NOSVE-NEXT: strb w10, [sp, #141] ; NONEON-NOSVE-NEXT: strb w8, [sp, #138] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #120] ; NONEON-NOSVE-NEXT: strb w11, [sp, #139] ; NONEON-NOSVE-NEXT: strb w8, [sp, #137] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #112] ; NONEON-NOSVE-NEXT: strb w8, [sp, #136] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #72] ; NONEON-NOSVE-NEXT: strb w8, [sp, #135] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #64] ; NONEON-NOSVE-NEXT: strb w8, [sp, #134] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #104] ; NONEON-NOSVE-NEXT: strb w8, [sp, #133] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #96] ; NONEON-NOSVE-NEXT: strb w8, [sp, #132] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #88] ; NONEON-NOSVE-NEXT: strb w8, [sp, #131] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #80] ; NONEON-NOSVE-NEXT: strb w8, [sp, #130] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] ; NONEON-NOSVE-NEXT: strb w8, [sp, #129] ; NONEON-NOSVE-NEXT: ldr x8, [sp] ; NONEON-NOSVE-NEXT: strb w8, [sp, #128] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #128] ; NONEON-NOSVE-NEXT: add sp, sp, #144 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i8> ret <16 x i8> %b } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i64_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q5, q6, [x0, #224] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: ldp q4, q7, [x0, #64] ; CHECK-NEXT: uzp1 z17.s, z6.s, z6.s ; CHECK-NEXT: ldp q6, q18, [x0, #192] ; CHECK-NEXT: uzp1 z16.s, z5.s, z5.s ; CHECK-NEXT: ldp q5, q19, [x0, #128] ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s ; CHECK-NEXT: ldp q18, q22, [x0, #160] ; CHECK-NEXT: uzp1 z20.s, z6.s, z6.s ; CHECK-NEXT: ldp q6, q23, [x0, #96] ; CHECK-NEXT: splice z16.s, p0, { z16.s, z17.s } ; CHECK-NEXT: uzp1 z27.s, z19.s, z19.s ; CHECK-NEXT: uzp1 z25.s, z22.s, z22.s ; CHECK-NEXT: uzp1 z26.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z24.s, z18.s, z18.s ; CHECK-NEXT: uzp1 z18.s, z23.s, z23.s ; CHECK-NEXT: uzp1 z23.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z17.s, z6.s, z6.s ; CHECK-NEXT: uzp1 z6.s, z7.s, z7.s ; CHECK-NEXT: uzp1 z22.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z5.s, z4.s, z4.s ; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s ; CHECK-NEXT: splice z3.s, p0, { z20.s, z21.s } ; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s ; CHECK-NEXT: splice z0.s, p0, { z24.s, z25.s } ; CHECK-NEXT: splice z7.s, p0, { z26.s, z27.s } ; CHECK-NEXT: splice z4.s, p0, { z17.s, z18.s } ; CHECK-NEXT: uzp1 z17.h, z16.h, z16.h ; CHECK-NEXT: splice z5.s, p0, { z5.s, z6.s } ; CHECK-NEXT: splice z6.s, p0, { z22.s, z23.s } ; CHECK-NEXT: splice z1.s, p0, { z1.s, z2.s } ; CHECK-NEXT: uzp1 z16.h, z3.h, z3.h ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z19.h, z4.h, z4.h ; CHECK-NEXT: uzp1 z2.h, z7.h, z7.h ; CHECK-NEXT: uzp1 z18.h, z5.h, z5.h ; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h ; CHECK-NEXT: splice z0.h, p0, { z16.h, z17.h } ; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h ; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h } ; CHECK-NEXT: splice z2.h, p0, { z18.h, z19.h } ; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h } ; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b ; CHECK-NEXT: uzp1 z7.b, z2.b, z2.b ; CHECK-NEXT: uzp1 z6.b, z3.b, z3.b ; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b } ; CHECK-NEXT: splice z1.b, p0, { z6.b, z7.b } ; CHECK-NEXT: add z0.b, z0.b, z0.b ; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #416 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96] ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #336] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64] ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #352] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #368] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32] ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #384] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128] ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #400] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0] ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #320] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224] ; NONEON-NOSVE-NEXT: str x1, [sp, #24] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192] ; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #160] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #184] ; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #192] ; NONEON-NOSVE-NEXT: stp q21, q19, [sp, #48] ; NONEON-NOSVE-NEXT: ldr w25, [sp, #208] ; NONEON-NOSVE-NEXT: ldr w26, [sp, #216] ; NONEON-NOSVE-NEXT: add w5, w9, w9 ; NONEON-NOSVE-NEXT: add w6, w8, w8 ; NONEON-NOSVE-NEXT: ldr w9, [sp, #192] ; NONEON-NOSVE-NEXT: stp q20, q23, [sp, #96] ; NONEON-NOSVE-NEXT: ldr w2, [sp, #64] ; NONEON-NOSVE-NEXT: ldr w16, [sp, #48] ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: ldr w18, [sp, #96] ; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #128] ; NONEON-NOSVE-NEXT: stp q6, q5, [sp, #224] ; NONEON-NOSVE-NEXT: ldr w3, [sp, #72] ; NONEON-NOSVE-NEXT: ldr w14, [sp, #128] ; NONEON-NOSVE-NEXT: stp q3, q17, [sp, #256] ; NONEON-NOSVE-NEXT: ldr w23, [sp, #240] ; NONEON-NOSVE-NEXT: ldr w21, [sp, #224] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #272] ; NONEON-NOSVE-NEXT: ldr w27, [sp, #256] ; NONEON-NOSVE-NEXT: ldr w28, [sp, #264] ; NONEON-NOSVE-NEXT: strb w9, [sp, #298] ; NONEON-NOSVE-NEXT: ldr w24, [sp, #248] ; NONEON-NOSVE-NEXT: ldr w22, [sp, #232] ; NONEON-NOSVE-NEXT: add w9, w27, w27 ; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w8, [sp, #200] ; NONEON-NOSVE-NEXT: str q7, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w0, [sp, #104] ; NONEON-NOSVE-NEXT: ldr w12, [sp, #112] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strb w9, [sp, #296] ; NONEON-NOSVE-NEXT: add w9, w25, w25 ; NONEON-NOSVE-NEXT: str q18, [sp, #80] ; NONEON-NOSVE-NEXT: ldr w19, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w20, [sp, #40] ; NONEON-NOSVE-NEXT: strb w8, [sp, #299] ; NONEON-NOSVE-NEXT: add w8, w28, w28 ; NONEON-NOSVE-NEXT: ldr w4, [sp, #80] ; NONEON-NOSVE-NEXT: strb w9, [sp, #294] ; NONEON-NOSVE-NEXT: add w9, w23, w23 ; NONEON-NOSVE-NEXT: ldr w7, [sp, #88] ; NONEON-NOSVE-NEXT: strb w8, [sp, #297] ; NONEON-NOSVE-NEXT: add w8, w26, w26 ; NONEON-NOSVE-NEXT: ldr w17, [sp, #56] ; NONEON-NOSVE-NEXT: strb w9, [sp, #292] ; NONEON-NOSVE-NEXT: add w9, w21, w21 ; NONEON-NOSVE-NEXT: ldr w10, [sp, #144] ; NONEON-NOSVE-NEXT: strb w8, [sp, #295] ; NONEON-NOSVE-NEXT: add w8, w24, w24 ; NONEON-NOSVE-NEXT: ldr w15, [sp, #136] ; NONEON-NOSVE-NEXT: strb w9, [sp, #290] ; NONEON-NOSVE-NEXT: add w9, w19, w19 ; NONEON-NOSVE-NEXT: ldr w13, [sp, #120] ; NONEON-NOSVE-NEXT: strb w8, [sp, #293] ; NONEON-NOSVE-NEXT: add w8, w22, w22 ; NONEON-NOSVE-NEXT: ldr w11, [sp, #152] ; NONEON-NOSVE-NEXT: strb w9, [sp, #288] ; NONEON-NOSVE-NEXT: add w9, w4, w4 ; NONEON-NOSVE-NEXT: ldr w1, [sp, #280] ; NONEON-NOSVE-NEXT: strb w8, [sp, #291] ; NONEON-NOSVE-NEXT: add w8, w20, w20 ; NONEON-NOSVE-NEXT: ldr w29, [sp, #160] ; NONEON-NOSVE-NEXT: strb w9, [sp, #318] ; NONEON-NOSVE-NEXT: add w9, w2, w2 ; NONEON-NOSVE-NEXT: ldr w30, [sp, #168] ; NONEON-NOSVE-NEXT: strb w8, [sp, #289] ; NONEON-NOSVE-NEXT: add w8, w7, w7 ; NONEON-NOSVE-NEXT: strb w9, [sp, #316] ; NONEON-NOSVE-NEXT: add w9, w18, w18 ; NONEON-NOSVE-NEXT: strb w8, [sp, #319] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: strb w9, [sp, #314] ; NONEON-NOSVE-NEXT: add w9, w16, w16 ; NONEON-NOSVE-NEXT: strb w8, [sp, #317] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: strb w9, [sp, #312] ; NONEON-NOSVE-NEXT: add w9, w14, w14 ; NONEON-NOSVE-NEXT: strb w8, [sp, #315] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: strb w9, [sp, #310] ; NONEON-NOSVE-NEXT: add w9, w12, w12 ; NONEON-NOSVE-NEXT: strb w8, [sp, #313] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: strb w9, [sp, #308] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: strb w8, [sp, #311] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strb w9, [sp, #306] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #309] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: strb w5, [sp, #303] ; NONEON-NOSVE-NEXT: add w5, w30, w30 ; NONEON-NOSVE-NEXT: strb w6, [sp, #302] ; NONEON-NOSVE-NEXT: add w6, w29, w29 ; NONEON-NOSVE-NEXT: strb w8, [sp, #307] ; NONEON-NOSVE-NEXT: add w8, w1, w1 ; NONEON-NOSVE-NEXT: strb w5, [sp, #301] ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #400] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w6, [sp, #300] ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #384] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w8, [sp, #305] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload ; NONEON-NOSVE-NEXT: strb w9, [sp, #304] ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #368] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #288] ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #352] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #336] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q1, q0, [x8] ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #320] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: add sp, sp, #416 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i8> %c = add <32 x i8> %b, %b store <32 x i8> %c, ptr %out ret void } ; ; truncate i64 -> i16 ; define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind { ; CHECK-LABEL: trunc_v4i64_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s ; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s } ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] ; NONEON-NOSVE-NEXT: strh w8, [sp, #44] ; NONEON-NOSVE-NEXT: ldp x8, x10, [sp] ; NONEON-NOSVE-NEXT: strh w9, [sp, #46] ; NONEON-NOSVE-NEXT: strh w10, [sp, #42] ; NONEON-NOSVE-NEXT: strh w8, [sp, #40] ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] ; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in %b = trunc <4 x i64> %a to <4 x i16> ret <4 x i16> %b } define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind { ; CHECK-LABEL: trunc_v8i64_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s ; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s } ; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h ; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #80 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] ; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] ; NONEON-NOSVE-NEXT: str q1, [sp, #48] ; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #16] ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] ; NONEON-NOSVE-NEXT: str q2, [sp] ; NONEON-NOSVE-NEXT: strh w8, [sp, #76] ; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #48] ; NONEON-NOSVE-NEXT: strh w9, [sp, #78] ; NONEON-NOSVE-NEXT: strh w8, [sp, #72] ; NONEON-NOSVE-NEXT: ldp x8, x11, [sp, #32] ; NONEON-NOSVE-NEXT: strh w10, [sp, #74] ; NONEON-NOSVE-NEXT: strh w8, [sp, #68] ; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] ; NONEON-NOSVE-NEXT: strh w11, [sp, #70] ; NONEON-NOSVE-NEXT: strh w8, [sp, #66] ; NONEON-NOSVE-NEXT: ldr x8, [sp] ; NONEON-NOSVE-NEXT: strh w8, [sp, #64] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] ; NONEON-NOSVE-NEXT: add sp, sp, #80 ; NONEON-NOSVE-NEXT: ret %a = load <8 x i64>, ptr %in %b = trunc <8 x i64> %a to <8 x i16> ret <8 x i16> %b } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v16i64_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #96] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: ldp q4, q5, [x0, #64] ; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z17.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z16.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z1.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z19.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z0.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s ; CHECK-NEXT: uzp1 z18.s, z4.s, z4.s ; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s ; CHECK-NEXT: splice z4.s, p0, { z16.s, z17.s } ; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: splice z5.s, p0, { z18.s, z19.s } ; CHECK-NEXT: splice z1.s, p0, { z2.s, z3.s } ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h ; CHECK-NEXT: uzp1 z7.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z2.h, z5.h, z5.h ; CHECK-NEXT: uzp1 z6.h, z1.h, z1.h ; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h } ; CHECK-NEXT: splice z1.h, p0, { z6.h, z7.h } ; CHECK-NEXT: add z0.h, z0.h, z0.h ; CHECK-NEXT: add z1.h, z1.h, z1.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v16i64_v16i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #160 ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] ; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #64] ; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #96] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #72] ; NONEON-NOSVE-NEXT: ldr w2, [sp, #96] ; NONEON-NOSVE-NEXT: ldr w3, [sp, #104] ; NONEON-NOSVE-NEXT: stp q5, q7, [sp] ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: ldr w4, [sp, #80] ; NONEON-NOSVE-NEXT: ldr w5, [sp, #88] ; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w18, [sp] ; NONEON-NOSVE-NEXT: ldr w0, [sp, #8] ; NONEON-NOSVE-NEXT: strh w9, [sp, #142] ; NONEON-NOSVE-NEXT: add w9, w3, w3 ; NONEON-NOSVE-NEXT: strh w8, [sp, #140] ; NONEON-NOSVE-NEXT: add w8, w2, w2 ; NONEON-NOSVE-NEXT: ldr w16, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w17, [sp, #40] ; NONEON-NOSVE-NEXT: strh w9, [sp, #138] ; NONEON-NOSVE-NEXT: add w9, w5, w5 ; NONEON-NOSVE-NEXT: strh w8, [sp, #136] ; NONEON-NOSVE-NEXT: add w8, w4, w4 ; NONEON-NOSVE-NEXT: ldr w14, [sp, #16] ; NONEON-NOSVE-NEXT: ldr w15, [sp, #24] ; NONEON-NOSVE-NEXT: strh w9, [sp, #134] ; NONEON-NOSVE-NEXT: add w9, w0, w0 ; NONEON-NOSVE-NEXT: strh w8, [sp, #132] ; NONEON-NOSVE-NEXT: add w8, w18, w18 ; NONEON-NOSVE-NEXT: ldr w12, [sp, #48] ; NONEON-NOSVE-NEXT: ldr w13, [sp, #56] ; NONEON-NOSVE-NEXT: strh w9, [sp, #130] ; NONEON-NOSVE-NEXT: add w9, w17, w17 ; NONEON-NOSVE-NEXT: strh w8, [sp, #128] ; NONEON-NOSVE-NEXT: add w8, w16, w16 ; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] ; NONEON-NOSVE-NEXT: ldr w11, [sp, #120] ; NONEON-NOSVE-NEXT: strh w9, [sp, #158] ; NONEON-NOSVE-NEXT: add w9, w15, w15 ; NONEON-NOSVE-NEXT: strh w8, [sp, #156] ; NONEON-NOSVE-NEXT: add w8, w14, w14 ; NONEON-NOSVE-NEXT: strh w9, [sp, #154] ; NONEON-NOSVE-NEXT: add w9, w13, w13 ; NONEON-NOSVE-NEXT: strh w8, [sp, #152] ; NONEON-NOSVE-NEXT: add w8, w12, w12 ; NONEON-NOSVE-NEXT: strh w9, [sp, #150] ; NONEON-NOSVE-NEXT: add w9, w11, w11 ; NONEON-NOSVE-NEXT: strh w8, [sp, #148] ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: strh w9, [sp, #146] ; NONEON-NOSVE-NEXT: strh w8, [sp, #144] ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #128] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] ; NONEON-NOSVE-NEXT: add sp, sp, #160 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i16> %c = add <16 x i16> %b, %b store <16 x i16> %c, ptr %out ret void } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i64_v32i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #160] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldp q4, q5, [x0, #96] ; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s ; CHECK-NEXT: ldp q3, q18, [x0, #128] ; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s ; CHECK-NEXT: ldp q2, q19, [x0, #192] ; CHECK-NEXT: ldp q0, q1, [x0, #64] ; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s ; CHECK-NEXT: ldp q18, q22, [x0, #224] ; CHECK-NEXT: uzp1 z20.s, z3.s, z3.s ; CHECK-NEXT: ldp q3, q23, [x0, #32] ; CHECK-NEXT: splice z16.s, p0, { z16.s, z17.s } ; CHECK-NEXT: uzp1 z27.s, z19.s, z19.s ; CHECK-NEXT: uzp1 z25.s, z22.s, z22.s ; CHECK-NEXT: uzp1 z26.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z24.s, z18.s, z18.s ; CHECK-NEXT: uzp1 z18.s, z23.s, z23.s ; CHECK-NEXT: uzp1 z23.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s ; CHECK-NEXT: uzp1 z22.s, z4.s, z4.s ; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s ; CHECK-NEXT: uzp1 z5.s, z1.s, z1.s ; CHECK-NEXT: splice z1.s, p0, { z20.s, z21.s } ; CHECK-NEXT: splice z6.s, p0, { z24.s, z25.s } ; CHECK-NEXT: uzp1 z4.s, z0.s, z0.s ; CHECK-NEXT: splice z0.s, p0, { z26.s, z27.s } ; CHECK-NEXT: splice z7.s, p0, { z17.s, z18.s } ; CHECK-NEXT: uzp1 z17.h, z16.h, z16.h ; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s } ; CHECK-NEXT: splice z3.s, p0, { z22.s, z23.s } ; CHECK-NEXT: splice z4.s, p0, { z4.s, z5.s } ; CHECK-NEXT: uzp1 z16.h, z1.h, z1.h ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h ; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z1.h, z7.h, z7.h ; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h ; CHECK-NEXT: splice z7.h, p0, { z16.h, z17.h } ; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h ; CHECK-NEXT: splice z4.h, p0, { z5.h, z6.h } ; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h } ; CHECK-NEXT: add z2.h, z7.h, z7.h ; CHECK-NEXT: add z3.h, z4.h, z4.h ; CHECK-NEXT: add z0.h, z0.h, z0.h ; CHECK-NEXT: add z1.h, z1.h, z1.h ; CHECK-NEXT: stp q2, q3, [x1, #32] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #432 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96] ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #352] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #64] ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #368] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #384] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #128] ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #400] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32] ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #416] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0] ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #336] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #224] ; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #192] ; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #160] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #144] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #168] ; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #176] ; NONEON-NOSVE-NEXT: stp q21, q19, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w25, [sp, #192] ; NONEON-NOSVE-NEXT: ldr w26, [sp, #200] ; NONEON-NOSVE-NEXT: add w6, w8, w8 ; NONEON-NOSVE-NEXT: add w5, w9, w9 ; NONEON-NOSVE-NEXT: ldr w9, [sp, #176] ; NONEON-NOSVE-NEXT: stp q20, q23, [sp, #80] ; NONEON-NOSVE-NEXT: ldr w2, [sp, #48] ; NONEON-NOSVE-NEXT: ldr w3, [sp, #56] ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: ldr w18, [sp, #80] ; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #112] ; NONEON-NOSVE-NEXT: stp q6, q5, [sp, #208] ; NONEON-NOSVE-NEXT: ldr w0, [sp, #88] ; NONEON-NOSVE-NEXT: ldr w16, [sp, #32] ; NONEON-NOSVE-NEXT: stp q3, q17, [sp, #240] ; NONEON-NOSVE-NEXT: ldr w23, [sp, #224] ; NONEON-NOSVE-NEXT: ldr w24, [sp, #232] ; NONEON-NOSVE-NEXT: ldr w10, [sp, #256] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #264] ; NONEON-NOSVE-NEXT: ldr w27, [sp, #240] ; NONEON-NOSVE-NEXT: ldr w28, [sp, #248] ; NONEON-NOSVE-NEXT: strh w9, [sp, #308] ; NONEON-NOSVE-NEXT: ldr w21, [sp, #208] ; NONEON-NOSVE-NEXT: add w9, w27, w27 ; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] ; NONEON-NOSVE-NEXT: str q7, [sp, #16] ; NONEON-NOSVE-NEXT: ldr w22, [sp, #216] ; NONEON-NOSVE-NEXT: ldr w17, [sp, #40] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w9, [sp, #304] ; NONEON-NOSVE-NEXT: add w9, w25, w25 ; NONEON-NOSVE-NEXT: strh w8, [sp, #310] ; NONEON-NOSVE-NEXT: add w8, w28, w28 ; NONEON-NOSVE-NEXT: ldr w19, [sp, #16] ; NONEON-NOSVE-NEXT: strh w8, [sp, #306] ; NONEON-NOSVE-NEXT: add w8, w26, w26 ; NONEON-NOSVE-NEXT: ldr w20, [sp, #24] ; NONEON-NOSVE-NEXT: str q18, [sp, #64] ; NONEON-NOSVE-NEXT: ldr w14, [sp, #112] ; NONEON-NOSVE-NEXT: ldr w15, [sp, #120] ; NONEON-NOSVE-NEXT: strh w8, [sp, #302] ; NONEON-NOSVE-NEXT: add w8, w24, w24 ; NONEON-NOSVE-NEXT: ldr w4, [sp, #64] ; NONEON-NOSVE-NEXT: strh w9, [sp, #300] ; NONEON-NOSVE-NEXT: add w9, w23, w23 ; NONEON-NOSVE-NEXT: ldr w7, [sp, #72] ; NONEON-NOSVE-NEXT: strh w8, [sp, #298] ; NONEON-NOSVE-NEXT: add w8, w22, w22 ; NONEON-NOSVE-NEXT: ldr w12, [sp, #96] ; NONEON-NOSVE-NEXT: strh w9, [sp, #296] ; NONEON-NOSVE-NEXT: add w9, w21, w21 ; NONEON-NOSVE-NEXT: ldr w13, [sp, #104] ; NONEON-NOSVE-NEXT: strh w8, [sp, #294] ; NONEON-NOSVE-NEXT: add w8, w20, w20 ; NONEON-NOSVE-NEXT: ldr w10, [sp, #128] ; NONEON-NOSVE-NEXT: strh w9, [sp, #292] ; NONEON-NOSVE-NEXT: add w9, w19, w19 ; NONEON-NOSVE-NEXT: ldr w11, [sp, #136] ; NONEON-NOSVE-NEXT: strh w8, [sp, #290] ; NONEON-NOSVE-NEXT: add w8, w7, w7 ; NONEON-NOSVE-NEXT: ldr w29, [sp, #144] ; NONEON-NOSVE-NEXT: strh w9, [sp, #288] ; NONEON-NOSVE-NEXT: add w9, w4, w4 ; NONEON-NOSVE-NEXT: ldr w30, [sp, #152] ; NONEON-NOSVE-NEXT: strh w8, [sp, #286] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: strh w9, [sp, #284] ; NONEON-NOSVE-NEXT: add w9, w2, w2 ; NONEON-NOSVE-NEXT: strh w8, [sp, #282] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: strh w9, [sp, #280] ; NONEON-NOSVE-NEXT: add w9, w18, w18 ; NONEON-NOSVE-NEXT: strh w8, [sp, #278] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: strh w9, [sp, #276] ; NONEON-NOSVE-NEXT: add w9, w16, w16 ; NONEON-NOSVE-NEXT: strh w8, [sp, #274] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: strh w9, [sp, #272] ; NONEON-NOSVE-NEXT: add w9, w14, w14 ; NONEON-NOSVE-NEXT: strh w8, [sp, #334] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: strh w9, [sp, #332] ; NONEON-NOSVE-NEXT: add w9, w12, w12 ; NONEON-NOSVE-NEXT: strh w8, [sp, #330] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: strh w9, [sp, #328] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: strh w8, [sp, #326] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: strh w9, [sp, #324] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: strh w5, [sp, #318] ; NONEON-NOSVE-NEXT: add w5, w30, w30 ; NONEON-NOSVE-NEXT: strh w6, [sp, #316] ; NONEON-NOSVE-NEXT: add w6, w29, w29 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: strh w5, [sp, #314] ; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #272] ; NONEON-NOSVE-NEXT: strh w6, [sp, #312] ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #416] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strh w8, [sp, #322] ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #400] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: strh w9, [sp, #320] ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #384] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #304] ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #368] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #352] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q3, q2, [x1] ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #336] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q0, q1, [x1, #32] ; NONEON-NOSVE-NEXT: add sp, sp, #432 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i16> %c = add <32 x i16> %b, %b store <32 x i16> %c, ptr %out ret void } ; ; truncate i64 -> i32 ; define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind { ; CHECK-LABEL: trunc_v4i64_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s ; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]! ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] ; NONEON-NOSVE-NEXT: ldp x8, x10, [sp] ; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] ; NONEON-NOSVE-NEXT: add sp, sp, #48 ; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in %b = trunc <4 x i64> %a to <4 x i32> ret <4 x i32> %b } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v8i64_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s ; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s } ; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: add z1.s, z2.s, z2.s ; CHECK-NEXT: add z0.s, z0.s, z0.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] ; NONEON-NOSVE-NEXT: stp q3, q1, [sp] ; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w12, [sp] ; NONEON-NOSVE-NEXT: ldr w13, [sp, #8] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #40] ; NONEON-NOSVE-NEXT: ldr w14, [sp, #16] ; NONEON-NOSVE-NEXT: ldr w15, [sp, #24] ; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] ; NONEON-NOSVE-NEXT: ldr w11, [sp, #56] ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] ; NONEON-NOSVE-NEXT: add w9, w13, w13 ; NONEON-NOSVE-NEXT: add w8, w12, w12 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] ; NONEON-NOSVE-NEXT: add w9, w15, w15 ; NONEON-NOSVE-NEXT: add w8, w14, w14 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] ; NONEON-NOSVE-NEXT: add w9, w11, w11 ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] ; NONEON-NOSVE-NEXT: add sp, sp, #96 ; NONEON-NOSVE-NEXT: ret %a = load <8 x i64>, ptr %in %b = trunc <8 x i64> %a to <8 x i32> %c = add <8 x i32> %b, %b store <8 x i32> %c, ptr %out ret void } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v16i64_v16i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #64] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldp q2, q3, [x0, #96] ; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: uzp1 z7.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z6.s, z1.s, z1.s ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z3.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z2.s, z4.s, z4.s ; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s ; CHECK-NEXT: splice z0.s, p0, { z6.s, z7.s } ; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s ; CHECK-NEXT: splice z1.s, p0, { z16.s, z17.s } ; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s } ; CHECK-NEXT: splice z3.s, p0, { z4.s, z5.s } ; CHECK-NEXT: add z0.s, z0.s, z0.s ; CHECK-NEXT: add z1.s, z1.s, z1.s ; CHECK-NEXT: add z2.s, z2.s, z2.s ; CHECK-NEXT: add z3.s, z3.s, z3.s ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v16i64_v16i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #192 ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] ; NONEON-NOSVE-NEXT: stp q2, q4, [sp, #64] ; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #96] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #64] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #72] ; NONEON-NOSVE-NEXT: ldr w2, [sp, #96] ; NONEON-NOSVE-NEXT: ldr w3, [sp, #104] ; NONEON-NOSVE-NEXT: stp q5, q7, [sp] ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: ldr w4, [sp, #80] ; NONEON-NOSVE-NEXT: ldr w5, [sp, #88] ; NONEON-NOSVE-NEXT: stp q6, q0, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w18, [sp] ; NONEON-NOSVE-NEXT: ldr w0, [sp, #8] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #168] ; NONEON-NOSVE-NEXT: add w9, w3, w3 ; NONEON-NOSVE-NEXT: add w8, w2, w2 ; NONEON-NOSVE-NEXT: ldr w16, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w17, [sp, #40] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #160] ; NONEON-NOSVE-NEXT: add w9, w5, w5 ; NONEON-NOSVE-NEXT: add w8, w4, w4 ; NONEON-NOSVE-NEXT: ldr w14, [sp, #16] ; NONEON-NOSVE-NEXT: ldr w15, [sp, #24] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #152] ; NONEON-NOSVE-NEXT: add w9, w0, w0 ; NONEON-NOSVE-NEXT: add w8, w18, w18 ; NONEON-NOSVE-NEXT: ldr w12, [sp, #48] ; NONEON-NOSVE-NEXT: ldr w13, [sp, #56] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #144] ; NONEON-NOSVE-NEXT: add w9, w17, w17 ; NONEON-NOSVE-NEXT: add w8, w16, w16 ; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] ; NONEON-NOSVE-NEXT: ldr w11, [sp, #120] ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #136] ; NONEON-NOSVE-NEXT: add w9, w15, w15 ; NONEON-NOSVE-NEXT: add w8, w14, w14 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] ; NONEON-NOSVE-NEXT: add w9, w13, w13 ; NONEON-NOSVE-NEXT: add w8, w12, w12 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #184] ; NONEON-NOSVE-NEXT: add w9, w11, w11 ; NONEON-NOSVE-NEXT: add w8, w10, w10 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #176] ; NONEON-NOSVE-NEXT: ldp q1, q3, [sp, #128] ; NONEON-NOSVE-NEXT: ldp q2, q0, [sp, #160] ; NONEON-NOSVE-NEXT: stp q3, q2, [x1] ; NONEON-NOSVE-NEXT: stp q0, q1, [x1, #32] ; NONEON-NOSVE-NEXT: add sp, sp, #192 ; NONEON-NOSVE-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i32> %c = add <16 x i32> %b, %b store <16 x i32> %c, ptr %out ret void } ; NOTE: Extra 'add' is to prevent the truncate being combined with the store. define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i64_v32i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #192] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: ldp q6, q7, [x0, #64] ; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s ; CHECK-NEXT: ldp q3, q18, [x0, #224] ; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s ; CHECK-NEXT: ldp q2, q19, [x0, #128] ; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s ; CHECK-NEXT: ldp q18, q22, [x0, #160] ; CHECK-NEXT: uzp1 z20.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z24.s, z19.s, z19.s ; CHECK-NEXT: ldp q3, q19, [x0, #96] ; CHECK-NEXT: uzp1 z23.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z26.s, z22.s, z22.s ; CHECK-NEXT: splice z2.s, p0, { z16.s, z17.s } ; CHECK-NEXT: uzp1 z17.s, z7.s, z7.s ; CHECK-NEXT: uzp1 z25.s, z18.s, z18.s ; CHECK-NEXT: splice z7.s, p0, { z20.s, z21.s } ; CHECK-NEXT: uzp1 z21.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s ; CHECK-NEXT: uzp1 z20.s, z4.s, z4.s ; CHECK-NEXT: uzp1 z5.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z16.s, z6.s, z6.s ; CHECK-NEXT: splice z6.s, p0, { z23.s, z24.s } ; CHECK-NEXT: uzp1 z18.s, z3.s, z3.s ; CHECK-NEXT: splice z3.s, p0, { z25.s, z26.s } ; CHECK-NEXT: uzp1 z4.s, z0.s, z0.s ; CHECK-NEXT: add z0.s, z2.s, z2.s ; CHECK-NEXT: add z7.s, z7.s, z7.s ; CHECK-NEXT: splice z1.s, p0, { z16.s, z17.s } ; CHECK-NEXT: splice z2.s, p0, { z18.s, z19.s } ; CHECK-NEXT: splice z16.s, p0, { z20.s, z21.s } ; CHECK-NEXT: splice z4.s, p0, { z4.s, z5.s } ; CHECK-NEXT: add z6.s, z6.s, z6.s ; CHECK-NEXT: add z3.s, z3.s, z3.s ; CHECK-NEXT: stp q0, q7, [x1, #96] ; CHECK-NEXT: add z0.s, z1.s, z1.s ; CHECK-NEXT: add z1.s, z2.s, z2.s ; CHECK-NEXT: add z2.s, z16.s, z16.s ; CHECK-NEXT: stp q6, q3, [x1, #64] ; CHECK-NEXT: add z3.s, z4.s, z4.s ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #496 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] ; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #416] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #192] ; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #432] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #448] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q23, q22, [x0, #224] ; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #464] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] ; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #480] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #96] ; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #400] // 16-byte Folded Spill ; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64] ; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #160] ; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #128] ; NONEON-NOSVE-NEXT: str q0, [sp, #192] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] ; NONEON-NOSVE-NEXT: stp q17, q23, [sp, #32] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #200] ; NONEON-NOSVE-NEXT: ldr w10, [sp, #32] ; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #160] ; NONEON-NOSVE-NEXT: ldr w12, [sp, #48] ; NONEON-NOSVE-NEXT: add w6, w8, w8 ; NONEON-NOSVE-NEXT: add w5, w9, w9 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #40] ; NONEON-NOSVE-NEXT: stp q18, q20, [sp, #112] ; NONEON-NOSVE-NEXT: ldr w25, [sp, #160] ; NONEON-NOSVE-NEXT: ldr w26, [sp, #168] ; NONEON-NOSVE-NEXT: str q5, [sp, #144] ; NONEON-NOSVE-NEXT: ldr w21, [sp, #176] ; NONEON-NOSVE-NEXT: ldr w22, [sp, #184] ; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #208] ; NONEON-NOSVE-NEXT: ldr w23, [sp, #144] ; NONEON-NOSVE-NEXT: ldr w24, [sp, #152] ; NONEON-NOSVE-NEXT: str q3, [sp, #16] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #208] ; NONEON-NOSVE-NEXT: ldr w4, [sp, #112] ; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #8] // 8-byte Folded Spill ; NONEON-NOSVE-NEXT: ldr w8, [sp, #216] ; NONEON-NOSVE-NEXT: ldr w27, [sp, #16] ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: ldr w28, [sp, #24] ; NONEON-NOSVE-NEXT: stp q22, q16, [sp, #64] ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: str w9, [sp, #344] ; NONEON-NOSVE-NEXT: add w9, w27, w27 ; NONEON-NOSVE-NEXT: str w8, [sp, #348] ; NONEON-NOSVE-NEXT: add w8, w28, w28 ; NONEON-NOSVE-NEXT: ldr w7, [sp, #120] ; NONEON-NOSVE-NEXT: stp q7, q21, [sp, #240] ; NONEON-NOSVE-NEXT: ldr w18, [sp, #128] ; NONEON-NOSVE-NEXT: ldr w0, [sp, #136] ; NONEON-NOSVE-NEXT: str w8, [sp, #340] ; NONEON-NOSVE-NEXT: add w8, w26, w26 ; NONEON-NOSVE-NEXT: ldr w19, [sp, #240] ; NONEON-NOSVE-NEXT: str w9, [sp, #336] ; NONEON-NOSVE-NEXT: add w9, w25, w25 ; NONEON-NOSVE-NEXT: ldr w20, [sp, #248] ; NONEON-NOSVE-NEXT: str w8, [sp, #332] ; NONEON-NOSVE-NEXT: add w8, w24, w24 ; NONEON-NOSVE-NEXT: ldr w16, [sp, #256] ; NONEON-NOSVE-NEXT: str w9, [sp, #328] ; NONEON-NOSVE-NEXT: add w9, w23, w23 ; NONEON-NOSVE-NEXT: ldr w17, [sp, #264] ; NONEON-NOSVE-NEXT: str q19, [sp, #96] ; NONEON-NOSVE-NEXT: ldr w14, [sp, #64] ; NONEON-NOSVE-NEXT: ldr w15, [sp, #72] ; NONEON-NOSVE-NEXT: str w8, [sp, #324] ; NONEON-NOSVE-NEXT: add w8, w22, w22 ; NONEON-NOSVE-NEXT: ldr w2, [sp, #96] ; NONEON-NOSVE-NEXT: str w9, [sp, #320] ; NONEON-NOSVE-NEXT: add w9, w21, w21 ; NONEON-NOSVE-NEXT: ldr w3, [sp, #104] ; NONEON-NOSVE-NEXT: str w8, [sp, #380] ; NONEON-NOSVE-NEXT: add w8, w20, w20 ; NONEON-NOSVE-NEXT: ldr w13, [sp, #56] ; NONEON-NOSVE-NEXT: str w9, [sp, #376] ; NONEON-NOSVE-NEXT: add w9, w19, w19 ; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] ; NONEON-NOSVE-NEXT: str w8, [sp, #372] ; NONEON-NOSVE-NEXT: add w8, w7, w7 ; NONEON-NOSVE-NEXT: ldr w11, [sp, #88] ; NONEON-NOSVE-NEXT: str w9, [sp, #368] ; NONEON-NOSVE-NEXT: add w9, w4, w4 ; NONEON-NOSVE-NEXT: ldr w29, [sp, #224] ; NONEON-NOSVE-NEXT: str w8, [sp, #316] ; NONEON-NOSVE-NEXT: add w8, w3, w3 ; NONEON-NOSVE-NEXT: ldr w30, [sp, #232] ; NONEON-NOSVE-NEXT: str w9, [sp, #312] ; NONEON-NOSVE-NEXT: add w9, w2, w2 ; NONEON-NOSVE-NEXT: str w8, [sp, #308] ; NONEON-NOSVE-NEXT: add w8, w0, w0 ; NONEON-NOSVE-NEXT: str w9, [sp, #304] ; NONEON-NOSVE-NEXT: add w9, w18, w18 ; NONEON-NOSVE-NEXT: str w8, [sp, #396] ; NONEON-NOSVE-NEXT: add w8, w17, w17 ; NONEON-NOSVE-NEXT: str w9, [sp, #392] ; NONEON-NOSVE-NEXT: add w9, w16, w16 ; NONEON-NOSVE-NEXT: str w8, [sp, #388] ; NONEON-NOSVE-NEXT: add w8, w15, w15 ; NONEON-NOSVE-NEXT: str w9, [sp, #384] ; NONEON-NOSVE-NEXT: add w9, w14, w14 ; NONEON-NOSVE-NEXT: str w8, [sp, #284] ; NONEON-NOSVE-NEXT: add w8, w13, w13 ; NONEON-NOSVE-NEXT: str w9, [sp, #280] ; NONEON-NOSVE-NEXT: add w9, w12, w12 ; NONEON-NOSVE-NEXT: str w8, [sp, #276] ; NONEON-NOSVE-NEXT: add w8, w11, w11 ; NONEON-NOSVE-NEXT: str w9, [sp, #272] ; NONEON-NOSVE-NEXT: add w9, w10, w10 ; NONEON-NOSVE-NEXT: str w8, [sp, #300] ; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: str w9, [sp, #296] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload ; NONEON-NOSVE-NEXT: add w8, w8, w8 ; NONEON-NOSVE-NEXT: str w5, [sp, #364] ; NONEON-NOSVE-NEXT: add w5, w30, w30 ; NONEON-NOSVE-NEXT: add w9, w9, w9 ; NONEON-NOSVE-NEXT: str w6, [sp, #360] ; NONEON-NOSVE-NEXT: add w6, w29, w29 ; NONEON-NOSVE-NEXT: str w5, [sp, #356] ; NONEON-NOSVE-NEXT: ldp q6, q3, [sp, #304] ; NONEON-NOSVE-NEXT: str w6, [sp, #352] ; NONEON-NOSVE-NEXT: ldp q4, q7, [sp, #368] ; NONEON-NOSVE-NEXT: str w8, [sp, #292] ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #336] ; NONEON-NOSVE-NEXT: str w9, [sp, #288] ; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #480] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp q5, q2, [sp, #272] ; NONEON-NOSVE-NEXT: stp q4, q3, [x1, #32] ; NONEON-NOSVE-NEXT: stp q1, q0, [x1] ; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #464] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q7, q6, [x1, #64] ; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #448] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: stp q2, q5, [x1, #96] ; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #432] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #416] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #400] // 16-byte Folded Reload ; NONEON-NOSVE-NEXT: add sp, sp, #496 ; NONEON-NOSVE-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i32> %c = add <32 x i32> %b, %b store <32 x i32> %c, ptr %out ret void }