// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include // CHECK-LABEL: @test_svread_ver_za8_u8_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg2(0, base); } // CHECK-LABEL: @test_svread_ver_za8_s8_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg2(0, base); } // CHECK-LABEL: @test_svread_hor_za8_u8_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg2(0, base); } // CHECK-LABEL: @test_svread_hor_za8_s8_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg2(0, base); } // CHECK-LABEL: @test_svread_hor_za8_u8_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg4(0, base); } // CHECK-LABEL: @test_svread_hor_za8_s8_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg4(0, base); } // CHECK-LABEL: @test_svread_ver_za8_u8_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg4(0, base); } // CHECK-LABEL: @test_svread_ver_za8_s8_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg4(0, base); } // CHECK-LABEL: @test_svread_hor_za16_u16_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg2(1, base); } // CHECK-LABEL: @test_svread_hor_za16_bf16_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg2(1, base); } // CHECK-LABEL: @test_svread_hor_za16_f16_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg2(1, base); } // CHECK-LABEL: @test_svread_hor_za16_s16_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg2(1, base); } // CHECK-LABEL: @test_svread_ver_za16_u16_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg2(1, base); } // CHECK-LABEL: @test_svread_ver_za16_bf16_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg2(1, base); } // CHECK-LABEL: @test_svread_ver_za16_f16_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg2(1, base); } // CHECK-LABEL: @test_svread_ver_za16_s16_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg2(1, base); } // CHECK-LABEL: @test_svread_hor_za16_u16_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg4(1, base); } // CHECK-LABEL: @test_svread_hor_za16_bf16_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg4(1, base); } // CHECK-LABEL: @test_svread_hor_za16_f16_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg4(1, base); } // CHECK-LABEL: @test_svread_hor_za16_s16_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg4(1, base); } // CHECK-LABEL: @test_svread_ver_za16_u16_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg4(1, base); } // CHECK-LABEL: @test_svread_ver_za16_bf16_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg4(1, base); } // CHECK-LABEL: @test_svread_ver_za16_f16_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg4(1, base); } // CHECK-LABEL: @test_svread_ver_za16_s16_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg4(1, base); } // CHECK-LABEL: @test_svread_hor_za32_u32_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg2(3, base); } // CHECK-LABEL: @test_svread_hor_za32_f32_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg2(3, base); } // CHECK-LABEL: @test_svread_hor_za32_s32_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg2(3, base); } // CHECK-LABEL: @test_svread_ver_za32_u32_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg2(3, base); } // CHECK-LABEL: @test_svread_ver_za32_f32_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg2(3, base); } // CHECK-LABEL: @test_svread_ver_za32_s32_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg2(3, base); } // CHECK-LABEL: @test_svread_hor_za32_u32_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg4(3, base); } // CHECK-LABEL: @test_svread_hor_za32_f32_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg4(3, base); } // CHECK-LABEL: @test_svread_hor_za32_s32_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg4(3, base); } // CHECK-LABEL: @test_svread_ver_za32_u32_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg4(3, base); } // CHECK-LABEL: @test_svread_ver_za32_f32_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg4(3, base); } // CHECK-LABEL: @test_svread_ver_za32_s32_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg4(3, base); } // CHECK-LABEL: @test_svread_hor_za64_u64_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg2(7, base); } // CHECK-LABEL: @test_svread_hor_za64_f64_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg2(7, base); } // CHECK-LABEL: @test_svread_hor_za64_s64_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg2(7, base); } // CHECK-LABEL: @test_svread_ver_za64_u64_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg2(7, base); } // CHECK-LABEL: @test_svread_ver_za64_f64_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg2(7, base); } // CHECK-LABEL: @test_svread_ver_za64_s64_vg2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg2(7, base); } // CHECK-LABEL: @test_svread_hor_za64_u64_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg4(7, base); } // CHECK-LABEL: @test_svread_hor_za64_f64_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg4(7, base); } // CHECK-LABEL: @test_svread_hor_za64_s64_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg4(7, base); } // CHECK-LABEL: @test_svread_ver_za64_u64_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg4(7, base); } // CHECK-LABEL: @test_svread_ver_za64_f64_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg4(7, base); } // CHECK-LABEL: @test_svread_ver_za64_s64_vg4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg4(7, base); } // CHECK-LABEL: @test_svread_za8_s8_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x2(base); } // CHECK-LABEL: @test_svread_za8_u8_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x2(base); } // CHECK-LABEL: @test_svread_za16_s16_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x2(base); } // CHECK-LABEL: @test_svread_za16_u16_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x2(base); } // CHECK-LABEL: @test_svread_za16_bf16_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x2(base); } // CHECK-LABEL: @test_svread_za16_f16_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x2(base); } // CHECK-LABEL: @test_svread_za32_s32_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x2(base); } // CHECK-LABEL: @test_svread_za32_u32_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x2(base); } // CHECK-LABEL: @test_svread_za32_f32_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x2(base); } // CHECK-LABEL: @test_svread_za64_u64_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x2(base); } // CHECK-LABEL: @test_svread_za64_f64_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x2(base); } // CHECK-LABEL: @test_svread_za64_s64_vg1x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x2j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x2(base); } // CHECK-LABEL: @test_svread_za8_s8_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x4(base); } // CHECK-LABEL: @test_svread_za8_u8_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x4(base); } // CHECK-LABEL: @test_svread_za16_s16_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x4(base); } // CHECK-LABEL: @test_svread_za16_u16_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x4(base); } // CHECK-LABEL: @test_svread_za16_bf16_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x4(base); } // CHECK-LABEL: @test_svread_za16_f16_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x4(base); } // CHECK-LABEL: @test_svread_za32_s32_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x4(base); } // CHECK-LABEL: @test_svread_za32_u32_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x4(base); } // CHECK-LABEL: @test_svread_za32_f32_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x4(base); } // CHECK-LABEL: @test_svread_za64_u64_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x4(base); } // CHECK-LABEL: @test_svread_za64_f64_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x4(base); } // CHECK-LABEL: @test_svread_za64_s64_vg1x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CHECK-NEXT: ret [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x4j( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x4(base); }