diff options
Diffstat (limited to 'clang/test/CodeGen/AArch64/neon-across.c')
| -rw-r--r-- | clang/test/CodeGen/AArch64/neon-across.c | 132 | 
1 files changed, 54 insertions, 78 deletions
| diff --git a/clang/test/CodeGen/AArch64/neon-across.c b/clang/test/CodeGen/AArch64/neon-across.c index aa0387d..aae5097 100644 --- a/clang/test/CodeGen/AArch64/neon-across.c +++ b/clang/test/CodeGen/AArch64/neon-across.c @@ -113,9 +113,8 @@ uint64_t test_vaddlvq_u32(uint32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXV_S8_I]]  //  int8_t test_vmaxv_s8(int8x8_t a) {    return vmaxv_s8(a); @@ -124,9 +123,8 @@ int8_t test_vmaxv_s8(int8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXV_S16_I]]  //  int16_t test_vmaxv_s16(int16x4_t a) {    return vmaxv_s16(a); @@ -135,9 +133,8 @@ int16_t test_vmaxv_s16(int16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXV_U8_I]]  //  uint8_t test_vmaxv_u8(uint8x8_t a) {    return vmaxv_u8(a); @@ -146,9 +143,8 @@ uint8_t test_vmaxv_u8(uint8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXV_U16_I]]  //  uint16_t test_vmaxv_u16(uint16x4_t a) {    return vmaxv_u16(a); @@ -157,9 +153,8 @@ uint16_t test_vmaxv_u16(uint16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXVQ_S8_I]]  //  int8_t test_vmaxvq_s8(int8x16_t a) {    return vmaxvq_s8(a); @@ -168,9 +163,8 @@ int8_t test_vmaxvq_s8(int8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXVQ_S16_I]]  //  int16_t test_vmaxvq_s16(int16x8_t a) {    return vmaxvq_s16(a); @@ -179,7 +173,7 @@ int16_t test_vmaxvq_s16(int16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXVQ_S32_I]]  //  int32_t test_vmaxvq_s32(int32x4_t a) { @@ -189,9 +183,8 @@ int32_t test_vmaxvq_s32(int32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXVQ_U8_I]]  //  uint8_t test_vmaxvq_u8(uint8x16_t a) {    return vmaxvq_u8(a); @@ -200,9 +193,8 @@ uint8_t test_vmaxvq_u8(uint8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXVQ_U16_I]]  //  uint16_t test_vmaxvq_u16(uint16x8_t a) {    return vmaxvq_u16(a); @@ -211,7 +203,7 @@ uint16_t test_vmaxvq_u16(uint16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXVQ_U32_I]]  //  uint32_t test_vmaxvq_u32(uint32x4_t a) { @@ -221,9 +213,8 @@ uint32_t test_vmaxvq_u32(uint32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_s8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINV_S8_I]]  //  int8_t test_vminv_s8(int8x8_t a) {    return vminv_s8(a); @@ -232,9 +223,8 @@ int8_t test_vminv_s8(int8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_s16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINV_S16_I]]  //  int16_t test_vminv_s16(int16x4_t a) {    return vminv_s16(a); @@ -243,9 +233,8 @@ int16_t test_vminv_s16(int16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_u8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINV_U8_I]]  //  uint8_t test_vminv_u8(uint8x8_t a) {    return vminv_u8(a); @@ -254,9 +243,8 @@ uint8_t test_vminv_u8(uint8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_u16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINV_U16_I]]  //  uint16_t test_vminv_u16(uint16x4_t a) {    return vminv_u16(a); @@ -265,9 +253,8 @@ uint16_t test_vminv_u16(uint16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINVQ_S8_I]]  //  int8_t test_vminvq_s8(int8x16_t a) {    return vminvq_s8(a); @@ -276,9 +263,8 @@ int8_t test_vminvq_s8(int8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINVQ_S16_I]]  //  int16_t test_vminvq_s16(int16x8_t a) {    return vminvq_s16(a); @@ -287,7 +273,7 @@ int16_t test_vminvq_s16(int16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINVQ_S32_I]]  //  int32_t test_vminvq_s32(int32x4_t a) { @@ -297,9 +283,8 @@ int32_t test_vminvq_s32(int32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINVQ_U8_I]]  //  uint8_t test_vminvq_u8(uint8x16_t a) {    return vminvq_u8(a); @@ -308,9 +293,8 @@ uint8_t test_vminvq_u8(uint8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINVQ_U16_I]]  //  uint16_t test_vminvq_u16(uint16x8_t a) {    return vminvq_u16(a); @@ -319,7 +303,7 @@ uint16_t test_vminvq_u16(uint16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINVQ_U32_I]]  //  uint32_t test_vminvq_u32(uint32x4_t a) { @@ -329,9 +313,8 @@ uint32_t test_vminvq_u32(uint32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDV_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDV_S8_I]]  //  int8_t test_vaddv_s8(int8x8_t a) {    return vaddv_s8(a); @@ -340,9 +323,8 @@ int8_t test_vaddv_s8(int8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDV_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDV_S16_I]]  //  int16_t test_vaddv_s16(int16x4_t a) {    return vaddv_s16(a); @@ -351,9 +333,8 @@ int16_t test_vaddv_s16(int16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDV_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDV_U8_I]]  //  uint8_t test_vaddv_u8(uint8x8_t a) {    return vaddv_u8(a); @@ -362,9 +343,8 @@ uint8_t test_vaddv_u8(uint8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDV_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDV_U16_I]]  //  uint16_t test_vaddv_u16(uint16x4_t a) {    return vaddv_u16(a); @@ -373,9 +353,8 @@ uint16_t test_vaddv_u16(uint16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDVQ_S8_I]]  //  int8_t test_vaddvq_s8(int8x16_t a) {    return vaddvq_s8(a); @@ -384,9 +363,8 @@ int8_t test_vaddvq_s8(int8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDVQ_S16_I]]  //  int16_t test_vaddvq_s16(int16x8_t a) {    return vaddvq_s16(a); @@ -395,7 +373,7 @@ int16_t test_vaddvq_s16(int16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VADDVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDVQ_S32_I]]  //  int32_t test_vaddvq_s32(int32x4_t a) { @@ -405,9 +383,8 @@ int32_t test_vaddvq_s32(int32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDVQ_U8_I]]  //  uint8_t test_vaddvq_u8(uint8x16_t a) {    return vaddvq_u8(a); @@ -416,9 +393,8 @@ uint8_t test_vaddvq_u8(uint8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDVQ_U16_I]]  //  uint16_t test_vaddvq_u16(uint16x8_t a) {    return vaddvq_u16(a); @@ -427,7 +403,7 @@ uint16_t test_vaddvq_u16(uint16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VADDVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDVQ_U32_I]]  //  uint32_t test_vaddvq_u32(uint32x4_t a) { | 
