1 files changed, 210 insertions, 210 deletions
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/load-store.c b/clang/test/CodeGen/arm-mve-intrinsics/load-store.c
index 2dde75f..50f7011 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/load-store.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/load-store.c
@@ -8,8 +8,8 @@
 
 // CHECK-LABEL: @test_vld1q_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vld1q_f16(const float16_t *base)
 {
@@ -22,8 +22,8 @@ float16x8_t test_vld1q_f16(const float16_t *base)
 
 // CHECK-LABEL: @test_vld1q_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vld1q_f32(const float32_t *base)
 {
@@ -36,8 +36,8 @@ float32x4_t test_vld1q_f32(const float32_t *base)
 
 // CHECK-LABEL: @test_vld1q_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vld1q_s8(const int8_t *base)
 {
@@ -50,8 +50,8 @@ int8x16_t test_vld1q_s8(const int8_t *base)
 
 // CHECK-LABEL: @test_vld1q_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vld1q_s16(const int16_t *base)
 {
@@ -64,8 +64,8 @@ int16x8_t test_vld1q_s16(const int16_t *base)
 
 // CHECK-LABEL: @test_vld1q_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vld1q_s32(const int32_t *base)
 {
@@ -78,8 +78,8 @@ int32x4_t test_vld1q_s32(const int32_t *base)
 
 // CHECK-LABEL: @test_vld1q_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vld1q_u8(const uint8_t *base)
 {
@@ -92,8 +92,8 @@ uint8x16_t test_vld1q_u8(const uint8_t *base)
 
 // CHECK-LABEL: @test_vld1q_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vld1q_u16(const uint16_t *base)
 {
@@ -106,8 +106,8 @@ uint16x8_t test_vld1q_u16(const uint16_t *base)
 
 // CHECK-LABEL: @test_vld1q_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vld1q_u32(const uint32_t *base)
 {
@@ -120,10 +120,10 @@ uint32x4_t test_vld1q_u32(const uint32_t *base)
 
 // CHECK-LABEL: @test_vld1q_z_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x half> zeroinitializer)
-// CHECK-NEXT:    ret <8 x half> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x half> zeroinitializer)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vld1q_z_f16(const float16_t *base, mve_pred16_t p)
 {
@@ -136,10 +136,10 @@ float16x8_t test_vld1q_z_f16(const float16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x float> zeroinitializer)
-// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x float> zeroinitializer)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vld1q_z_f32(const float32_t *base, mve_pred16_t p)
 {
@@ -152,10 +152,10 @@ float32x4_t test_vld1q_z_f32(const float32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 //
 int8x16_t test_vld1q_z_s8(const int8_t *base, mve_pred16_t p)
 {
@@ -168,10 +168,10 @@ int8x16_t test_vld1q_z_s8(const int8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 int16x8_t test_vld1q_z_s16(const int16_t *base, mve_pred16_t p)
 {
@@ -184,10 +184,10 @@ int16x8_t test_vld1q_z_s16(const int16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vld1q_z_s32(const int32_t *base, mve_pred16_t p)
 {
@@ -200,10 +200,10 @@ int32x4_t test_vld1q_z_s32(const int32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 //
 uint8x16_t test_vld1q_z_u8(const uint8_t *base, mve_pred16_t p)
 {
@@ -216,10 +216,10 @@ uint8x16_t test_vld1q_z_u8(const uint8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 uint16x8_t test_vld1q_z_u16(const uint16_t *base, mve_pred16_t p)
 {
@@ -232,10 +232,10 @@ uint16x8_t test_vld1q_z_u16(const uint16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 uint32x4_t test_vld1q_z_u32(const uint32_t *base, mve_pred16_t p)
 {
@@ -248,8 +248,8 @@ uint32x4_t test_vld1q_z_u32(const uint32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vldrbq_s8(const int8_t *base)
 {
@@ -258,9 +258,9 @@ int8x16_t test_vldrbq_s8(const int8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
 int16x8_t test_vldrbq_s16(const int8_t *base)
 {
@@ -269,9 +269,9 @@ int16x8_t test_vldrbq_s16(const int8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = sext <4 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
 int32x4_t test_vldrbq_s32(const int8_t *base)
 {
@@ -280,8 +280,8 @@ int32x4_t test_vldrbq_s32(const int8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vldrbq_u8(const uint8_t *base)
 {
@@ -290,9 +290,9 @@ uint8x16_t test_vldrbq_u8(const uint8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
 uint16x8_t test_vldrbq_u16(const uint8_t *base)
 {
@@ -301,9 +301,9 @@ uint16x8_t test_vldrbq_u16(const uint8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
 uint32x4_t test_vldrbq_u32(const uint8_t *base)
 {
@@ -312,10 +312,10 @@ uint32x4_t test_vldrbq_u32(const uint8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_z_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 //
 int8x16_t test_vldrbq_z_s8(const int8_t *base, mve_pred16_t p)
 {
@@ -324,11 +324,11 @@ int8x16_t test_vldrbq_z_s8(const int8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]], <8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i8> [[TMP3]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 //
 int16x8_t test_vldrbq_z_s16(const int8_t *base, mve_pred16_t p)
 {
@@ -337,11 +337,11 @@ int16x8_t test_vldrbq_z_s16(const int8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]], <4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 int32x4_t test_vldrbq_z_s32(const int8_t *base, mve_pred16_t p)
 {
@@ -350,10 +350,10 @@ int32x4_t test_vldrbq_z_s32(const int8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 //
 uint8x16_t test_vldrbq_z_u8(const uint8_t *base, mve_pred16_t p)
 {
@@ -362,11 +362,11 @@ uint8x16_t test_vldrbq_z_u8(const uint8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]], <8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 //
 uint16x8_t test_vldrbq_z_u16(const uint8_t *base, mve_pred16_t p)
 {
@@ -375,11 +375,11 @@ uint16x8_t test_vldrbq_z_u16(const uint8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]], <4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 uint32x4_t test_vldrbq_z_u32(const uint8_t *base, mve_pred16_t p)
 {
@@ -388,8 +388,8 @@ uint32x4_t test_vldrbq_z_u32(const uint8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vldrhq_f16(const float16_t *base)
 {
@@ -398,8 +398,8 @@ float16x8_t test_vldrhq_f16(const float16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vldrhq_s16(const int16_t *base)
 {
@@ -408,9 +408,9 @@ int16x8_t test_vldrhq_s16(const int16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
 int32x4_t test_vldrhq_s32(const int16_t *base)
 {
@@ -419,8 +419,8 @@ int32x4_t test_vldrhq_s32(const int16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vldrhq_u16(const uint16_t *base)
 {
@@ -429,9 +429,9 @@ uint16x8_t test_vldrhq_u16(const uint16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
 uint32x4_t test_vldrhq_u32(const uint16_t *base)
 {
@@ -440,10 +440,10 @@ uint32x4_t test_vldrhq_u32(const uint16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_z_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x half> zeroinitializer)
-// CHECK-NEXT:    ret <8 x half> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x half> zeroinitializer)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vldrhq_z_f16(const float16_t *base, mve_pred16_t p)
 {
@@ -452,10 +452,10 @@ float16x8_t test_vldrhq_z_f16(const float16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_z_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 int16x8_t test_vldrhq_z_s16(const int16_t *base, mve_pred16_t p)
 {
@@ -464,11 +464,11 @@ int16x8_t test_vldrhq_z_s16(const int16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_z_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]], <4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP1]], <4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 int32x4_t test_vldrhq_z_s32(const int16_t *base, mve_pred16_t p)
 {
@@ -477,10 +477,10 @@ int32x4_t test_vldrhq_z_s32(const int16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_z_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 uint16x8_t test_vldrhq_z_u16(const uint16_t *base, mve_pred16_t p)
 {
@@ -489,11 +489,11 @@ uint16x8_t test_vldrhq_z_u16(const uint16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_z_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]], <4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP1]], <4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 uint32x4_t test_vldrhq_z_u32(const uint16_t *base, mve_pred16_t p)
 {
@@ -502,8 +502,8 @@ uint32x4_t test_vldrhq_z_u32(const uint16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrwq_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vldrwq_f32(const float32_t *base)
 {
@@ -512,8 +512,8 @@ float32x4_t test_vldrwq_f32(const float32_t *base)
 
 // CHECK-LABEL: @test_vldrwq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vldrwq_s32(const int32_t *base)
 {
@@ -522,8 +522,8 @@ int32x4_t test_vldrwq_s32(const int32_t *base)
 
 // CHECK-LABEL: @test_vldrwq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vldrwq_u32(const uint32_t *base)
 {
@@ -532,10 +532,10 @@ uint32x4_t test_vldrwq_u32(const uint32_t *base)
 
 // CHECK-LABEL: @test_vldrwq_z_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x float> zeroinitializer)
-// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x float> zeroinitializer)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vldrwq_z_f32(const float32_t *base, mve_pred16_t p)
 {
@@ -544,10 +544,10 @@ float32x4_t test_vldrwq_z_f32(const float32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrwq_z_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vldrwq_z_s32(const int32_t *base, mve_pred16_t p)
 {
@@ -556,10 +556,10 @@ int32x4_t test_vldrwq_z_s32(const int32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrwq_z_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 uint32x4_t test_vldrwq_z_u32(const uint32_t *base, mve_pred16_t p)
 {
@@ -680,9 +680,9 @@ void test_vst1q_u32(uint32_t *base, uint32x4_t value)
 
 // CHECK-LABEL: @test_vst1q_p_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
@@ -696,9 +696,9 @@ void test_vst1q_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
@@ -712,9 +712,9 @@ void test_vst1q_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
@@ -728,9 +728,9 @@ void test_vst1q_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
@@ -744,9 +744,9 @@ void test_vst1q_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
@@ -760,9 +760,9 @@ void test_vst1q_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
@@ -776,9 +776,9 @@ void test_vst1q_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
@@ -792,9 +792,9 @@ void test_vst1q_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_u32(uint32_t *base, uint32x4_t value, mve_pred16_t p)
@@ -896,9 +896,9 @@ void test_vstrbq_u32(uint8_t *base, uint32x4_t value)
 
 // CHECK-LABEL: @test_vstrbq_p_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
@@ -913,9 +913,9 @@ void test_vstrbq_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrbq_p_s16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_s16(int8_t *base, int16x8_t value, mve_pred16_t p)
@@ -930,9 +930,9 @@ void test_vstrbq_p_s16(int8_t *base, int16x8_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrbq_p_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_s32(int8_t *base, int32x4_t value, mve_pred16_t p)
@@ -946,9 +946,9 @@ void test_vstrbq_p_s32(int8_t *base, int32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrbq_p_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
@@ -963,9 +963,9 @@ void test_vstrbq_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrbq_p_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_u16(uint8_t *base, uint16x8_t value, mve_pred16_t p)
@@ -980,9 +980,9 @@ void test_vstrbq_p_u16(uint8_t *base, uint16x8_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrbq_p_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_u32(uint8_t *base, uint32x4_t value, mve_pred16_t p)
@@ -1068,9 +1068,9 @@ void test_vstrhq_u32(uint16_t *base, uint32x4_t value)
 
 // CHECK-LABEL: @test_vstrhq_p_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
@@ -1084,9 +1084,9 @@ void test_vstrhq_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrhq_p_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
@@ -1101,9 +1101,9 @@ void test_vstrhq_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrhq_p_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_s32(int16_t *base, int32x4_t value, mve_pred16_t p)
@@ -1117,9 +1117,9 @@ void test_vstrhq_p_s32(int16_t *base, int32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrhq_p_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
@@ -1134,9 +1134,9 @@ void test_vstrhq_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrhq_p_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_u32(uint16_t *base, uint32x4_t value, mve_pred16_t p)
@@ -1192,9 +1192,9 @@ void test_vstrwq_u32(uint32_t *base, uint32x4_t value)
 
 // CHECK-LABEL: @test_vstrwq_p_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrwq_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
@@ -1208,9 +1208,9 @@ void test_vstrwq_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrwq_p_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrwq_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
@@ -1224,9 +1224,9 @@ void test_vstrwq_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrwq_p_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrwq_p_u32(uint32_t *base, uint32x4_t value, mve_pred16_t p)