diff options
Diffstat (limited to 'clang/test/CodeGen/arm-mve-intrinsics/load-store.c')
-rw-r--r-- | clang/test/CodeGen/arm-mve-intrinsics/load-store.c | 420 |
1 files changed, 210 insertions, 210 deletions
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/load-store.c b/clang/test/CodeGen/arm-mve-intrinsics/load-store.c index 2dde75f..50f7011 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/load-store.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/load-store.c @@ -8,8 +8,8 @@ // CHECK-LABEL: @test_vld1q_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2 -// CHECK-NEXT: ret <8 x half> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2 +// CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vld1q_f16(const float16_t *base) { @@ -22,8 +22,8 @@ float16x8_t test_vld1q_f16(const float16_t *base) // CHECK-LABEL: @test_vld1q_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4 -// CHECK-NEXT: ret <4 x float> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4 +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vld1q_f32(const float32_t *base) { @@ -36,8 +36,8 @@ float32x4_t test_vld1q_f32(const float32_t *base) // CHECK-LABEL: @test_vld1q_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1 -// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1 +// CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vld1q_s8(const int8_t *base) { @@ -50,8 +50,8 @@ int8x16_t test_vld1q_s8(const int8_t *base) // CHECK-LABEL: @test_vld1q_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2 -// CHECK-NEXT: ret <8 x i16> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2 +// CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vld1q_s16(const int16_t *base) { @@ -64,8 +64,8 @@ int16x8_t test_vld1q_s16(const int16_t *base) // CHECK-LABEL: @test_vld1q_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4 -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4 +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vld1q_s32(const int32_t *base) { @@ -78,8 +78,8 @@ int32x4_t test_vld1q_s32(const int32_t *base) // CHECK-LABEL: @test_vld1q_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1 -// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1 +// CHECK-NEXT: ret <16 x i8> [[TMP0]] // uint8x16_t test_vld1q_u8(const uint8_t *base) { @@ -92,8 +92,8 @@ uint8x16_t test_vld1q_u8(const uint8_t *base) // CHECK-LABEL: @test_vld1q_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2 -// CHECK-NEXT: ret <8 x i16> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2 +// CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vld1q_u16(const uint16_t *base) { @@ -106,8 +106,8 @@ uint16x8_t test_vld1q_u16(const uint16_t *base) // CHECK-LABEL: @test_vld1q_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4 -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4 +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vld1q_u32(const uint32_t *base) { @@ -120,10 +120,10 @@ uint32x4_t test_vld1q_u32(const uint32_t *base) // CHECK-LABEL: @test_vld1q_z_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x half> zeroinitializer) -// CHECK-NEXT: ret <8 x half> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x half> zeroinitializer) +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vld1q_z_f16(const float16_t *base, mve_pred16_t p) { @@ -136,10 +136,10 @@ float16x8_t test_vld1q_z_f16(const float16_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vld1q_z_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x float> zeroinitializer) -// CHECK-NEXT: ret <4 x float> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x float> zeroinitializer) +// CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vld1q_z_f32(const float32_t *base, mve_pred16_t p) { @@ -152,10 +152,10 @@ float32x4_t test_vld1q_z_f32(const float32_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vld1q_z_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer) -// CHECK-NEXT: ret <16 x i8> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vld1q_z_s8(const int8_t *base, mve_pred16_t p) { @@ -168,10 +168,10 @@ int8x16_t test_vld1q_z_s8(const int8_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vld1q_z_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer) -// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vld1q_z_s16(const int16_t *base, mve_pred16_t p) { @@ -184,10 +184,10 @@ int16x8_t test_vld1q_z_s16(const int16_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vld1q_z_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer) -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vld1q_z_s32(const int32_t *base, mve_pred16_t p) { @@ -200,10 +200,10 @@ int32x4_t test_vld1q_z_s32(const int32_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vld1q_z_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer) -// CHECK-NEXT: ret <16 x i8> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vld1q_z_u8(const uint8_t *base, mve_pred16_t p) { @@ -216,10 +216,10 @@ uint8x16_t test_vld1q_z_u8(const uint8_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vld1q_z_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer) -// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vld1q_z_u16(const uint16_t *base, mve_pred16_t p) { @@ -232,10 +232,10 @@ uint16x8_t test_vld1q_z_u16(const uint16_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vld1q_z_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer) -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vld1q_z_u32(const uint32_t *base, mve_pred16_t p) { @@ -248,8 +248,8 @@ uint32x4_t test_vld1q_z_u32(const uint32_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrbq_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1 -// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1 +// CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vldrbq_s8(const int8_t *base) { @@ -258,9 +258,9 @@ int8x16_t test_vldrbq_s8(const int8_t *base) // CHECK-LABEL: @test_vldrbq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1 -// CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[TMP1]] // int16x8_t test_vldrbq_s16(const int8_t *base) { @@ -269,9 +269,9 @@ int16x8_t test_vldrbq_s16(const int8_t *base) // CHECK-LABEL: @test_vldrbq_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1 -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i8> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] // int32x4_t test_vldrbq_s32(const int8_t *base) { @@ -280,8 +280,8 @@ int32x4_t test_vldrbq_s32(const int8_t *base) // CHECK-LABEL: @test_vldrbq_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1 -// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1 +// CHECK-NEXT: ret <16 x i8> [[TMP0]] // uint8x16_t test_vldrbq_u8(const uint8_t *base) { @@ -290,9 +290,9 @@ uint8x16_t test_vldrbq_u8(const uint8_t *base) // CHECK-LABEL: @test_vldrbq_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1 -// CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[TMP1]] // uint16x8_t test_vldrbq_u16(const uint8_t *base) { @@ -301,9 +301,9 @@ uint16x8_t test_vldrbq_u16(const uint8_t *base) // CHECK-LABEL: @test_vldrbq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1 -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] // uint32x4_t test_vldrbq_u32(const uint8_t *base) { @@ -312,10 +312,10 @@ uint32x4_t test_vldrbq_u32(const uint8_t *base) // CHECK-LABEL: @test_vldrbq_z_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer) -// CHECK-NEXT: ret <16 x i8> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vldrbq_z_s8(const int8_t *base, mve_pred16_t p) { @@ -324,11 +324,11 @@ int8x16_t test_vldrbq_z_s8(const int8_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrbq_z_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]], <8 x i8> zeroinitializer) -// CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i8> [[TMP3]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i8> zeroinitializer) +// CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[TMP3]] // int16x8_t test_vldrbq_z_s16(const int8_t *base, mve_pred16_t p) { @@ -337,11 +337,11 @@ int16x8_t test_vldrbq_z_s16(const int8_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrbq_z_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]], <4 x i8> zeroinitializer) -// CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i8> zeroinitializer) +// CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // int32x4_t test_vldrbq_z_s32(const int8_t *base, mve_pred16_t p) { @@ -350,10 +350,10 @@ int32x4_t test_vldrbq_z_s32(const int8_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrbq_z_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer) -// CHECK-NEXT: ret <16 x i8> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vldrbq_z_u8(const uint8_t *base, mve_pred16_t p) { @@ -362,11 +362,11 @@ uint8x16_t test_vldrbq_z_u8(const uint8_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrbq_z_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]], <8 x i8> zeroinitializer) -// CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i8> zeroinitializer) +// CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[TMP3]] // uint16x8_t test_vldrbq_z_u16(const uint8_t *base, mve_pred16_t p) { @@ -375,11 +375,11 @@ uint16x8_t test_vldrbq_z_u16(const uint8_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrbq_z_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]], <4 x i8> zeroinitializer) -// CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i8> zeroinitializer) +// CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // uint32x4_t test_vldrbq_z_u32(const uint8_t *base, mve_pred16_t p) { @@ -388,8 +388,8 @@ uint32x4_t test_vldrbq_z_u32(const uint8_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrhq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2 -// CHECK-NEXT: ret <8 x half> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2 +// CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vldrhq_f16(const float16_t *base) { @@ -398,8 +398,8 @@ float16x8_t test_vldrhq_f16(const float16_t *base) // CHECK-LABEL: @test_vldrhq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2 -// CHECK-NEXT: ret <8 x i16> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2 +// CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vldrhq_s16(const int16_t *base) { @@ -408,9 +408,9 @@ int16x8_t test_vldrhq_s16(const int16_t *base) // CHECK-LABEL: @test_vldrhq_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2 -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] // int32x4_t test_vldrhq_s32(const int16_t *base) { @@ -419,8 +419,8 @@ int32x4_t test_vldrhq_s32(const int16_t *base) // CHECK-LABEL: @test_vldrhq_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2 -// CHECK-NEXT: ret <8 x i16> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2 +// CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vldrhq_u16(const uint16_t *base) { @@ -429,9 +429,9 @@ uint16x8_t test_vldrhq_u16(const uint16_t *base) // CHECK-LABEL: @test_vldrhq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2 -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] // uint32x4_t test_vldrhq_u32(const uint16_t *base) { @@ -440,10 +440,10 @@ uint32x4_t test_vldrhq_u32(const uint16_t *base) // CHECK-LABEL: @test_vldrhq_z_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x half> zeroinitializer) -// CHECK-NEXT: ret <8 x half> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x half> zeroinitializer) +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vldrhq_z_f16(const float16_t *base, mve_pred16_t p) { @@ -452,10 +452,10 @@ float16x8_t test_vldrhq_z_f16(const float16_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrhq_z_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer) -// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vldrhq_z_s16(const int16_t *base, mve_pred16_t p) { @@ -464,11 +464,11 @@ int16x8_t test_vldrhq_z_s16(const int16_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrhq_z_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]], <4 x i16> zeroinitializer) -// CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP1]], <4 x i16> zeroinitializer) +// CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // int32x4_t test_vldrhq_z_s32(const int16_t *base, mve_pred16_t p) { @@ -477,10 +477,10 @@ int32x4_t test_vldrhq_z_s32(const int16_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrhq_z_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer) -// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vldrhq_z_u16(const uint16_t *base, mve_pred16_t p) { @@ -489,11 +489,11 @@ uint16x8_t test_vldrhq_z_u16(const uint16_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrhq_z_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]], <4 x i16> zeroinitializer) -// CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP1]], <4 x i16> zeroinitializer) +// CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP3]] // uint32x4_t test_vldrhq_z_u32(const uint16_t *base, mve_pred16_t p) { @@ -502,8 +502,8 @@ uint32x4_t test_vldrhq_z_u32(const uint16_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrwq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4 -// CHECK-NEXT: ret <4 x float> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4 +// CHECK-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vldrwq_f32(const float32_t *base) { @@ -512,8 +512,8 @@ float32x4_t test_vldrwq_f32(const float32_t *base) // CHECK-LABEL: @test_vldrwq_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4 -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4 +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vldrwq_s32(const int32_t *base) { @@ -522,8 +522,8 @@ int32x4_t test_vldrwq_s32(const int32_t *base) // CHECK-LABEL: @test_vldrwq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4 -// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4 +// CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vldrwq_u32(const uint32_t *base) { @@ -532,10 +532,10 @@ uint32x4_t test_vldrwq_u32(const uint32_t *base) // CHECK-LABEL: @test_vldrwq_z_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x float> zeroinitializer) -// CHECK-NEXT: ret <4 x float> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x float> zeroinitializer) +// CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vldrwq_z_f32(const float32_t *base, mve_pred16_t p) { @@ -544,10 +544,10 @@ float32x4_t test_vldrwq_z_f32(const float32_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrwq_z_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer) -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vldrwq_z_s32(const int32_t *base, mve_pred16_t p) { @@ -556,10 +556,10 @@ int32x4_t test_vldrwq_z_s32(const int32_t *base, mve_pred16_t p) // CHECK-LABEL: @test_vldrwq_z_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer) -// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vldrwq_z_u32(const uint32_t *base, mve_pred16_t p) { @@ -680,9 +680,9 @@ void test_vst1q_u32(uint32_t *base, uint32x4_t value) // CHECK-LABEL: @test_vst1q_p_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vst1q_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p) @@ -696,9 +696,9 @@ void test_vst1q_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p) // CHECK-LABEL: @test_vst1q_p_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vst1q_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p) @@ -712,9 +712,9 @@ void test_vst1q_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p) // CHECK-LABEL: @test_vst1q_p_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vst1q_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p) @@ -728,9 +728,9 @@ void test_vst1q_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p) // CHECK-LABEL: @test_vst1q_p_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vst1q_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p) @@ -744,9 +744,9 @@ void test_vst1q_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p) // CHECK-LABEL: @test_vst1q_p_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vst1q_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p) @@ -760,9 +760,9 @@ void test_vst1q_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p) // CHECK-LABEL: @test_vst1q_p_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vst1q_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p) @@ -776,9 +776,9 @@ void test_vst1q_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p) // CHECK-LABEL: @test_vst1q_p_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vst1q_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p) @@ -792,9 +792,9 @@ void test_vst1q_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p) // CHECK-LABEL: @test_vst1q_p_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vst1q_p_u32(uint32_t *base, uint32x4_t value, mve_pred16_t p) @@ -896,9 +896,9 @@ void test_vstrbq_u32(uint8_t *base, uint32x4_t value) // CHECK-LABEL: @test_vstrbq_p_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vstrbq_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p) @@ -913,9 +913,9 @@ void test_vstrbq_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrbq_p_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) +// CHECK-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]]) // CHECK-NEXT: ret void // void test_vstrbq_p_s16(int8_t *base, int16x8_t value, mve_pred16_t p) @@ -930,9 +930,9 @@ void test_vstrbq_p_s16(int8_t *base, int16x8_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrbq_p_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]]) -// CHECK-NEXT: call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]]) // CHECK-NEXT: ret void // void test_vstrbq_p_s32(int8_t *base, int32x4_t value, mve_pred16_t p) @@ -946,9 +946,9 @@ void test_vstrbq_p_s32(int8_t *base, int32x4_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrbq_p_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vstrbq_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p) @@ -963,9 +963,9 @@ void test_vstrbq_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrbq_p_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) -// CHECK-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) +// CHECK-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]]) // CHECK-NEXT: ret void // void test_vstrbq_p_u16(uint8_t *base, uint16x8_t value, mve_pred16_t p) @@ -980,9 +980,9 @@ void test_vstrbq_p_u16(uint8_t *base, uint16x8_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrbq_p_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]]) -// CHECK-NEXT: call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]]) // CHECK-NEXT: ret void // void test_vstrbq_p_u32(uint8_t *base, uint32x4_t value, mve_pred16_t p) @@ -1068,9 +1068,9 @@ void test_vstrhq_u32(uint16_t *base, uint32x4_t value) // CHECK-LABEL: @test_vstrhq_p_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vstrhq_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p) @@ -1084,9 +1084,9 @@ void test_vstrhq_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrhq_p_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vstrhq_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p) @@ -1101,9 +1101,9 @@ void test_vstrhq_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrhq_p_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]]) -// CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]]) // CHECK-NEXT: ret void // void test_vstrhq_p_s32(int16_t *base, int32x4_t value, mve_pred16_t p) @@ -1117,9 +1117,9 @@ void test_vstrhq_p_s32(int16_t *base, int32x4_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrhq_p_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vstrhq_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p) @@ -1134,9 +1134,9 @@ void test_vstrhq_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrhq_p_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]]) -// CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP3]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) +// CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]]) // CHECK-NEXT: ret void // void test_vstrhq_p_u32(uint16_t *base, uint32x4_t value, mve_pred16_t p) @@ -1192,9 +1192,9 @@ void test_vstrwq_u32(uint32_t *base, uint32x4_t value) // CHECK-LABEL: @test_vstrwq_p_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vstrwq_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p) @@ -1208,9 +1208,9 @@ void test_vstrwq_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrwq_p_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vstrwq_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p) @@ -1224,9 +1224,9 @@ void test_vstrwq_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p) // CHECK-LABEL: @test_vstrwq_p_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) -// CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]]) // CHECK-NEXT: ret void // void test_vstrwq_p_u32(uint32_t *base, uint32x4_t value, mve_pred16_t p) |