96 files changed, 8958 insertions, 5624 deletions
diff --git a/llvm/test/Analysis/BasicAA/modref.ll b/llvm/test/Analysis/BasicAA/modref.ll
index 1aab28f3..4a91fee 100644
--- a/llvm/test/Analysis/BasicAA/modref.ll
+++ b/llvm/test/Analysis/BasicAA/modref.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes=gvn,dse -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.end.p0(ptr nocapture)
 
 declare void @external(ptr)
 
@@ -71,7 +71,7 @@ define void @test3(i8 %X) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:    [[P:%.*]] = alloca i64, align 8
 ; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1, ptr [[P]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[P]])
 ; CHECK-NEXT:    store i8 2, ptr [[P2]], align 1
 ; CHECK-NEXT:    call void @external(ptr [[P]])
 ; CHECK-NEXT:    ret void
@@ -81,7 +81,7 @@ define void @test3(i8 %X) {
 
   %P2 = getelementptr i8, ptr %P, i32 2
   store i8 %Y, ptr %P2  ;; Not read by lifetime.end, should be removed.
-  call void @llvm.lifetime.end.p0(i64 1, ptr %P)
+  call void @llvm.lifetime.end.p0(ptr %P)
   store i8 2, ptr %P2
   call void @external(ptr %P)
   ret void
@@ -90,7 +90,7 @@ define void @test3(i8 %X) {
 define void @test3a(i8 %X) {
 ; CHECK-LABEL: @test3a(
 ; CHECK-NEXT:    [[P:%.*]] = alloca i64, align 8
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 10, ptr [[P]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[P]])
 ; CHECK-NEXT:    ret void
 ;
   %P = alloca i64
@@ -98,7 +98,7 @@ define void @test3a(i8 %X) {
 
   %P2 = getelementptr i8, ptr %P, i32 2
   store i8 %Y, ptr %P2
-  call void @llvm.lifetime.end.p0(i64 10, ptr %P)
+  call void @llvm.lifetime.end.p0(ptr %P)
   ret void
 }
 
diff --git a/llvm/test/Analysis/BasicAA/phi-values-usage.ll b/llvm/test/Analysis/BasicAA/phi-values-usage.ll
index 43df41c..680e1df 100644
--- a/llvm/test/Analysis/BasicAA/phi-values-usage.ll
+++ b/llvm/test/Analysis/BasicAA/phi-values-usage.ll
@@ -14,7 +14,7 @@ target datalayout = "p:8:8-n8"
 
 declare void @otherfn(ptr)
 declare i32 @__gxx_personality_v0(...)
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.end.p0(ptr nocapture)
 @c = external global ptr, align 1
 
 ; This function is one where if we didn't free basicaa after memcpyopt then the
@@ -65,7 +65,7 @@ for.body:                                         ; preds = %for.cond
   br label %for.cond
 
 for.cond.cleanup:                                 ; preds = %for.cond
-  call void @llvm.lifetime.end.p0(i64 1, ptr %a)
+  call void @llvm.lifetime.end.p0(ptr %a)
   %1 = load ptr, ptr %d.0, align 1
   store ptr %1, ptr @c, align 1
   ret void
diff --git a/llvm/test/Analysis/CallGraph/ignore-assumelike-calls.ll b/llvm/test/Analysis/CallGraph/ignore-assumelike-calls.ll
index 1c9d201..b93a2a0 100644
--- a/llvm/test/Analysis/CallGraph/ignore-assumelike-calls.ll
+++ b/llvm/test/Analysis/CallGraph/ignore-assumelike-calls.ll
@@ -29,7 +29,7 @@
 define internal void @used_by_lifetime() {
 entry:
   %a = alloca i8
-  call void @llvm.lifetime.start.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.start.p0(ptr %a)
   ret void
 }
 
@@ -55,6 +55,6 @@ define internal void @other_cast_intrinsic_use() {
   ret void
 }
 
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.start.p0(ptr nocapture)
 declare void @llvm.memset.p0.i64(ptr, i8, i64, i1 immarg)
 declare void @llvm.memset.p1.i64(ptr addrspace(1), i8, i64, i1 immarg)
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index de1b39d..c208d03 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -34,10 +34,10 @@ define void @fadd() {
 
 define void @fadd_fp16() {
 ; CHECK-BASE-LABEL: 'fadd_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fadd half undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fadd half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fadd <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fadd <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fadd <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fadd_fp16'
@@ -84,10 +84,10 @@ define void @fsub() {
 
 define void @fsub_fp16() {
 ; CHECK-BASE-LABEL: 'fsub_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fsub <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fsub_fp16'
@@ -134,9 +134,9 @@ define void @fneg_idiom() {
 
 define void @fneg_idiom_fp16() {
 ; CHECK-BASE-LABEL: 'fneg_idiom_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half 0xH8000, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fsub half 0xH8000, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fneg_idiom_fp16'
@@ -180,21 +180,13 @@ define void @fneg() {
 }
 
 define void @fneg_fp16() {
-; CHECK-BASE-LABEL: 'fneg_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
-;
-; CHECK-FP16-LABEL: 'fneg_fp16'
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-LABEL: 'fneg_fp16'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = fneg half undef
   %V2F16 = fneg <2 x half> undef
@@ -252,16 +244,16 @@ define void @fmulfneg() {
 
 define void @fmulneg_fp16() {
 ; CHECK-BASE-LABEL: 'fmulneg_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half %F16, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> %V2F16, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> %V4F16, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> %V8F16, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> %V16F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half %F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> %V2F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> %V4F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> %V8F16, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> %V16F16, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fmulneg_fp16'
@@ -338,16 +330,16 @@ define void @fnegfmul() {
 
 define void @fnegfmul_fp16() {
 ; CHECK-BASE-LABEL: 'fnegfmul_fp16'
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half %F16M
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> %V2F16M
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> %V4F16M
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> %V8F16M
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> %V16F16M
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %F16M = fmul half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fneg half %F16M
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16M = fmul <2 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V2F16 = fneg <2 x half> %V2F16M
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16M = fmul <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fneg <4 x half> %V4F16M
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16M = fmul <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fneg <8 x half> %V8F16M
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16M = fmul <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fneg <16 x half> %V16F16M
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fnegfmul_fp16'
@@ -405,12 +397,19 @@ define void @fmul() {
 }
 
 define void @fmul_fp16() {
-; CHECK-LABEL: 'fmul_fp16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fmul half undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-BASE-LABEL: 'fmul_fp16'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fmul half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fmul_fp16'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = fmul half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = fmul <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = fmul <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = fmul <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = fmul half undef, undef
   %V4F16 = fmul <4 x half> undef, undef
@@ -448,12 +447,19 @@ define void @fdiv() {
 }
 
 define void @fdiv_fp16() {
-; CHECK-LABEL: 'fdiv_fp16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = fdiv half undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16F16 = fdiv <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-BASE-LABEL: 'fdiv_fp16'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = fdiv half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = fdiv <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'fdiv_fp16'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = fdiv half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = fdiv <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = fdiv <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 4 for: %V16F16 = fdiv <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = fdiv half undef, undef
   %V4F16 = fdiv <4 x half> undef, undef
@@ -491,12 +497,19 @@ define void @frem() {
 }
 
 define void @frem_fp16() {
-; CHECK-LABEL: 'frem_fp16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = frem half undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:52 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:216 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-BASE-LABEL: 'frem_fp16'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = frem half undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:55 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <4 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:110 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:220 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-FP16-LABEL: 'frem_fp16'
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %F16 = frem half undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:52 CodeSize:4 Lat:4 SizeLat:4 for: %V4F16 = frem <4 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:4 Lat:4 SizeLat:4 for: %V8F16 = frem <8 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:216 CodeSize:4 Lat:4 SizeLat:4 for: %V16F16 = frem <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = frem half undef, undef
   %V4F16 = frem <4 x half> undef, undef
@@ -650,13 +663,13 @@ define void @fcopysign_fp16() {
 
 define void @fma() {
 ; CHECK-LABEL: 'fma'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F128 = call fp128 @llvm.fma.f128(fp128 undef, fp128 undef, fp128 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: %V2F128 = call <2 x fp128> @llvm.fma.v2f128(<2 x fp128> undef, <2 x fp128> undef, <2 x fp128> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
@@ -685,10 +698,10 @@ define void @fma_fp16() {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fma_fp16'
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 4 for: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
@@ -700,13 +713,13 @@ define void @fma_fp16() {
 
 define void @fmuladd() {
 ; CHECK-LABEL: 'fmuladd'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %F32 = call float @llvm.fmuladd.f32(float undef, float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2F32 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4F32 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8F32 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %F64 = call double @llvm.fmuladd.f64(double undef, double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2F64 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4F64 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F32 = call float @llvm.fmuladd.f32(float undef, float undef, float undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F64 = call double @llvm.fmuladd.f64(double undef, double undef, double undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:6 SizeLat:2 for: %F128 = call fp128 @llvm.fmuladd.f128(fp128 undef, fp128 undef, fp128 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:2 Lat:6 SizeLat:2 for: %V2F128 = call <2 x fp128> @llvm.fmuladd.v2f128(<2 x fp128> undef, <2 x fp128> undef, <2 x fp128> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
@@ -729,16 +742,16 @@ define void @fmuladd() {
 define void @fmuladd_fp16() {
 ; CHECK-BASE-LABEL: 'fmuladd_fp16'
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %F16 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V4F16 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %V8F16 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:2 Lat:6 SizeLat:2 for: %V4F16 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:2 Lat:6 SizeLat:2 for: %V8F16 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:2 Lat:6 SizeLat:2 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fmuladd_fp16'
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %F16 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %V4F16 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %V8F16 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 4 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %F16 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %F16 = call half @llvm.fmuladd.f32(half undef, half undef, half undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll
index 254715a..0d1f20b 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s  --check-prefixes=COMMON,BASE
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=COMMON,SVE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -31,60 +32,128 @@ declare <32 x i8>  @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @add(i32 %arg) {
-; CHECK-LABEL: 'add'
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; COMMON-LABEL: 'add'
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 poison, i64 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 poison, i32 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> poison, <3 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 poison, i16 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 poison, i8 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
 ;
-  %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-  %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-  %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-  %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-
-  %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-  %V2I32  = call <2 x i32>  @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-  %V4I32  = call <4 x i32>  @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-  %V8I32  = call <8 x i32>  @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-  %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-  %V3I32  = call <3 x i32>  @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef)
-
-  %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-  %V2I16  = call <2 x i16>  @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-  %V4I16  = call <4 x i16>  @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-  %V8I16  = call <8 x i16>  @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-  %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-
-  %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-  %V2I8  = call <2 x i8>  @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-  %V4I8  = call <4 x i8>  @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-  %V8I8  = call <8 x i8>  @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-  %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-  %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-
-  ret i32 undef
+  %I64 = call i64 @llvm.sadd.sat.i64(i64 poison, i64 poison)
+  %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %I32 = call i32 @llvm.sadd.sat.i32(i32 poison, i32 poison)
+  %V2I32  = call <2 x i32>  @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+  %V3I32  = call <3 x i32>  @llvm.sadd.sat.v3i32(<3 x i32> poison, <3 x i32> poison)
+
+  %I16 = call i16 @llvm.sadd.sat.i16(i16 poison, i16 poison)
+  %V2I16  = call <2 x i16>  @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %I8 = call i8 @llvm.sadd.sat.i8(i8 poison, i8 poison)
+  %V2I8  = call <2 x i8>  @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
+}
+
+define i32 @add_sve_vscale2(i32 %arg) vscale_range(2,2) {
+; BASE-LABEL: 'add_sve_vscale2'
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; SVE-LABEL: 'add_sve_vscale2'
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %V2I32  = call <2 x i32>  @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %V2I16  = call <2 x i16>  @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %V2I8  = call <2 x i8>  @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
 }
 
 declare i64        @llvm.ssub.sat.i64(i64, i64)
@@ -114,56 +183,124 @@ declare <32 x i8>  @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @sub(i32 %arg) {
-; CHECK-LABEL: 'sub'
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; COMMON-LABEL: 'sub'
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 poison, i64 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 poison, i32 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 poison, i16 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 poison, i8 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
 ;
-  %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-  %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-  %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-  %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-
-  %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-  %V2I32  = call <2 x i32>  @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-  %V4I32  = call <4 x i32>  @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-  %V8I32  = call <8 x i32>  @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-  %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-
-  %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-  %V2I16  = call <2 x i16>  @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-  %V4I16  = call <4 x i16>  @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-  %V8I16  = call <8 x i16>  @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-  %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-
-  %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-  %V2I8  = call <2 x i8>  @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-  %V4I8  = call <4 x i8>  @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-  %V8I8  = call <8 x i8>  @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-  %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-  %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-
-  ret i32 undef
+  %I64 = call i64 @llvm.ssub.sat.i64(i64 poison, i64 poison)
+  %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %I32 = call i32 @llvm.ssub.sat.i32(i32 poison, i32 poison)
+  %V2I32  = call <2 x i32>  @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %I16 = call i16 @llvm.ssub.sat.i16(i16 poison, i16 poison)
+  %V2I16  = call <2 x i16>  @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %I8 = call i8 @llvm.ssub.sat.i8(i8 poison, i8 poison)
+  %V2I8  = call <2 x i8>  @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
+}
+
+define i32 @sub_sve_vscale2(i32 %arg) vscale_range(2,2) {
+; BASE-LABEL: 'sub_sve_vscale2'
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; SVE-LABEL: 'sub_sve_vscale2'
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %V2I32  = call <2 x i32>  @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %V2I16  = call <2 x i16>  @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %V2I8  = call <2 x i8>  @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll b/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll
index dba42b1..032a448 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=COMMON,BASE
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=COMMON,SVE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -30,58 +31,126 @@ declare <32 x i8>  @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @add(i32 %arg) {
-; CHECK-LABEL: 'add'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; COMMON-LABEL: 'add'
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 poison, i64 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 poison, i32 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 poison, i16 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 poison, i8 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
 ;
-  %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-  %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-  %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-  %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-
-  %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-  %V2I32  = call <2 x i32>  @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-  %V4I32  = call <4 x i32>  @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-  %V8I32  = call <8 x i32>  @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-  %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-
-  %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-  %V2I16  = call <2 x i16>  @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-  %V4I16  = call <4 x i16>  @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-  %V8I16  = call <8 x i16>  @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-  %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-
-  %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-  %V2I8  = call <2 x i8>  @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-  %V4I8  = call <4 x i8>  @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-  %V8I8  = call <8 x i8>  @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-  %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-  %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-
-  ret i32 undef
+  %I64 = call i64 @llvm.uadd.sat.i64(i64 poison, i64 poison)
+  %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %I32 = call i32 @llvm.uadd.sat.i32(i32 poison, i32 poison)
+  %V2I32  = call <2 x i32>  @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %I16 = call i16 @llvm.uadd.sat.i16(i16 poison, i16 poison)
+  %V2I16  = call <2 x i16>  @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %I8 = call i8 @llvm.uadd.sat.i8(i8 poison, i8 poison)
+  %V2I8  = call <2 x i8>  @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
+}
+
+define i32 @add_sve_vscale2(i32 %arg) vscale_range(2,2) {
+; BASE-LABEL: 'add_sve_vscale2'
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; SVE-LABEL: 'add_sve_vscale2'
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %V2I32  = call <2 x i32>  @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %V2I16  = call <2 x i16>  @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %V2I8  = call <2 x i8>  @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
 }
 
 declare i64        @llvm.usub.sat.i64(i64, i64)
@@ -111,56 +180,124 @@ declare <32 x i8>  @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @sub(i32 %arg) {
-; CHECK-LABEL: 'sub'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; COMMON-LABEL: 'sub'
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.usub.sat.i64(i64 poison, i64 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.usub.sat.i32(i32 poison, i32 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.usub.sat.i16(i16 poison, i16 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.usub.sat.i8(i8 poison, i8 poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; COMMON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
 ;
-  %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-  %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-  %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-  %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-
-  %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-  %V2I32  = call <2 x i32>  @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-  %V4I32  = call <4 x i32>  @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-  %V8I32  = call <8 x i32>  @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-  %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-
-  %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-  %V2I16  = call <2 x i16>  @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-  %V4I16  = call <4 x i16>  @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-  %V8I16  = call <8 x i16>  @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-  %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-  %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-
-  %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-  %V2I8  = call <2 x i8>  @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-  %V4I8  = call <4 x i8>  @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-  %V8I8  = call <8 x i8>  @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-  %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-  %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-  %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-
-  ret i32 undef
+  %I64 = call i64 @llvm.usub.sat.i64(i64 poison, i64 poison)
+  %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %I32 = call i32 @llvm.usub.sat.i32(i32 poison, i32 poison)
+  %V2I32  = call <2 x i32>  @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %I16 = call i16 @llvm.usub.sat.i16(i16 poison, i16 poison)
+  %V2I16  = call <2 x i16>  @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %I8 = call i8 @llvm.usub.sat.i8(i8 poison, i8 poison)
+  %V2I8  = call <2 x i8>  @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
+}
+
+define i32 @sub_sve_vscale2(i32 %arg) vscale_range(2,2) {
+; BASE-LABEL: 'sub_sve_vscale2'
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; SVE-LABEL: 'sub_sve_vscale2'
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of 2 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+  %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %V2I32  = call <2 x i32>  @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %V2I16  = call <2 x i16>  @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %V2I8  = call <2 x i8>  @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/extract_float.ll b/llvm/test/Analysis/CostModel/AArch64/extract_float.ll
index d2b75faa..c214021 100644
--- a/llvm/test/Analysis/CostModel/AArch64/extract_float.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/extract_float.ll
@@ -11,6 +11,7 @@ define double @extract_case1(<2 x double> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <2 x double> %a, i32 1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %res
+;
 entry:
   %1 = extractelement <2 x double> %a, i32 0
   %2 = extractelement <2 x double> %a, i32 1
@@ -24,6 +25,7 @@ define double @extract_case2(<2 x double> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %0 = extractelement <2 x double> %a, i32 1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %0
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %res
+;
 entry:
   %1 = extractelement <2 x double> %a, i32 1
   %res = fmul double %1, %1
@@ -36,6 +38,7 @@ define double @extract_case3(<2 x double> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %0
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %res
+;
 entry:
   %1 = extractelement <2 x double> %a, i32 0
   %res = fmul double %1, %1
@@ -48,6 +51,7 @@ define double @extract_case4(<2 x double> %a, double %b) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %res
+;
 entry:
   %1 = extractelement <2 x double> %a, i32 0
   %res = fmul double %1, %b
@@ -60,6 +64,7 @@ define double @extract_case5(<2 x double> %a, double %b) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %res
+;
 entry:
   %1 = extractelement <2 x double> %a, i32 1
   %res = fmul double %1, %b
@@ -74,6 +79,7 @@ define double @extract_case6(<3 x double> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <3 x double> %a, i32 1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %res
+;
 entry:
   %1 = extractelement <3 x double> %a, i32 0
   %2 = extractelement <3 x double> %a, i32 1
@@ -90,6 +96,7 @@ define double @extract_case7(<4 x double> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <4 x double> %a, i32 2
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %res
+;
 entry:
   %1 = extractelement <4 x double> %a, i32 1
   %2 = extractelement <4 x double> %a, i32 2
@@ -108,6 +115,7 @@ define double @extract_case8(<2 x double> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fmul double %0, %1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fmul double %3, %4
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %5
+;
 entry:
   %1 = extractelement <2 x double> %a, i32 0
   %2 = extractelement <2 x double> %a, i32 1
@@ -129,6 +137,7 @@ define double @extract_case9(<2 x double> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fmul double %0, %1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fmul double %3, %4
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %5
+;
 entry:
   %1 = extractelement <2 x double> %a, i32 0
   %2 = extractelement <2 x double> %a, i32 1
@@ -148,6 +157,7 @@ define double @extract_case10(<4 x double> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @foo(double %1)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = fmul double %0, %1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %2
+;
 entry:
   %1 = extractelement <4 x double> %a, i32 0
   %2 = extractelement <4 x double> %a, i32 1
@@ -161,7 +171,7 @@ define half @extract_case11(<2 x half> %a) {
 ; NOFP16-LABEL: 'extract_case11'
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x half> %a, i32 0
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x half> %a, i32 1
-; NOFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul half %0, %1
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res = fmul half %0, %1
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %res
 ;
 ; FULLFP16-LABEL: 'extract_case11'
@@ -169,6 +179,7 @@ define half @extract_case11(<2 x half> %a) {
 ; FULLFP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <2 x half> %a, i32 1
 ; FULLFP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul half %0, %1
 ; FULLFP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret half %res
+;
 entry:
   %1 = extractelement <2 x half> %a, i32 0
   %2 = extractelement <2 x half> %a, i32 1
@@ -183,6 +194,7 @@ define float @extract_case12(<2 x float> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <2 x float> %a, i32 1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = fmul float %0, %1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %res
+;
 entry:
   %1 = extractelement <2 x float> %a, i32 0
   %2 = extractelement <2 x float> %a, i32 1
@@ -198,6 +210,7 @@ define double @extract_case13(<2 x double> %a) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x double> %a, i32 1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = fadd double %0, %1
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %res
+;
 entry:
   %1 = extractelement <2 x double> %a, i32 0
   %2 = extractelement <2 x double> %a, i32 1
diff --git a/llvm/test/Analysis/CostModel/AArch64/histograms.ll b/llvm/test/Analysis/CostModel/AArch64/histograms.ll
new file mode 100644
index 0000000..c048958
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/histograms.ll
@@ -0,0 +1,178 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-SVE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2 | FileCheck %s --check-prefix=CHECK-SVE2
+
+define void @histograms() {
+; CHECK-NEON-LABEL: 'histograms'
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-LABEL: 'histograms'
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE2-LABEL: 'histograms'
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+; CHECK-SVE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr> poison, i64 1, <vscale x 2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr> poison, i32 1, <vscale x 4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr> poison, i16 1, <vscale x 8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr> poison, i8 1, <vscale x 16 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr> poison, i64 1, <2 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr> poison, i32 1, <4 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr> poison, i16 1, <8 x i1> poison)
+  call void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr> poison, i8 1, <16 x i1> poison)
+  ret void
+}
+
+declare void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr>, i64, <vscale x 2 x i1>)
+declare void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr>, i16, <vscale x 8 x i1>)
+declare void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr>, i8, <vscale x 16 x i1>)
+declare void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr>, i64, <2 x i1>)
+declare void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr>, i32, <4 x i1>)
+declare void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr>, i16, <8 x i1>)
+declare void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr>, i8, <16 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.nxv2p0.i64(<vscale x 2 x ptr>, i64, <vscale x 2 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.nxv4p0.i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.nxv8p0.i16(<vscale x 8 x ptr>, i16, <vscale x 8 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.nxv16p0.i8(<vscale x 16 x ptr>, i8, <vscale x 16 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.v2p0.i64(<2 x ptr>, i64, <2 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.v4p0.i32(<4 x ptr>, i32, <4 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i16(<8 x ptr>, i16, <8 x i1>)
+declare void @llvm.experimental.vector.histogram.uadd.sat.v16p0.i8(<16 x ptr>, i8, <16 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.nxv2p0.i64(<vscale x 2 x ptr>, i64, <vscale x 2 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.nxv4p0.i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.nxv8p0.i16(<vscale x 8 x ptr>, i16, <vscale x 8 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.nxv16p0.i8(<vscale x 16 x ptr>, i8, <vscale x 16 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.v2p0.i64(<2 x ptr>, i64, <2 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.v4p0.i32(<4 x ptr>, i32, <4 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.v8p0.i16(<8 x ptr>, i16, <8 x i1>)
+declare void @llvm.experimental.vector.histogram.umax.v16p0.i8(<16 x ptr>, i8, <16 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.nxv2p0.i64(<vscale x 2 x ptr>, i64, <vscale x 2 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.nxv4p0.i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.nxv8p0.i16(<vscale x 8 x ptr>, i16, <vscale x 8 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.nxv16p0.i8(<vscale x 16 x ptr>, i8, <vscale x 16 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.v2p0.i64(<2 x ptr>, i64, <2 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.v4p0.i32(<4 x ptr>, i32, <4 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.v8p0.i16(<8 x ptr>, i16, <8 x i1>)
+declare void @llvm.experimental.vector.histogram.umin.v16p0.i8(<16 x ptr>, i8, <16 x i1>)
diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
index f565924..c4236d2 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
@@ -26,10 +26,10 @@ define void @strict_fp_reductions() {
 
 define void @strict_fp_reductions_fp16() {
 ; CHECK-NOFP16-LABEL: 'strict_fp_reductions_fp16'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-F16-LABEL: 'strict_fp_reductions_fp16'
@@ -40,10 +40,10 @@ define void @strict_fp_reductions_fp16() {
 ; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'strict_fp_reductions_fp16'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:5 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:23 Lat:46 SizeLat:30 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:46 Lat:92 SizeLat:60 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
@@ -55,15 +55,15 @@ define void @strict_fp_reductions_fp16() {
 
 define void @strict_fp_reductions_bf16() {
 ; CHECK-NOFP16-LABEL: 'strict_fp_reductions_bf16'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-F16-LABEL: 'strict_fp_reductions_bf16'
-; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'strict_fp_reductions_bf16'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:26 CodeSize:11 Lat:22 SizeLat:14 for: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4f8(bfloat 0.0, <4 x bfloat> undef)
@@ -117,16 +117,16 @@ define void @fast_fp_reductions() {
 
 define void @fast_fp_reductions_fp16() {
 ; CHECK-NOFP16-LABEL: 'fast_fp_reductions_fp16'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:203 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:248 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-F16-LABEL: 'fast_fp_reductions_fp16'
@@ -143,16 +143,16 @@ define void @fast_fp_reductions_fp16() {
 ; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'fast_fp_reductions_fp16'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:6 SizeLat:4 for: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:27 Lat:33 SizeLat:27 for: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:44 Lat:52 SizeLat:44 for: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:203 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:248 CodeSize:35 Lat:41 SizeLat:35 for: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
@@ -175,15 +175,15 @@ define void @fast_fp_reductions_fp16() {
 
 define void @fast_fp_reductions_bf16() {
 ; CHECK-NOFP16-LABEL: 'fast_fp_reductions_bf16'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-F16-LABEL: 'fast_fp_reductions_bf16'
-; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
 ; CHECK-F16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'fast_fp_reductions_bf16'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:8 Lat:12 SizeLat:8 for: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4f8(bfloat -0.0, <4 x bfloat> undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
index dc95eac..1c40354 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
@@ -164,3 +164,55 @@ define void @frem() {
 
   ret void
 }
+
+define void @fma() {
+; CHECK-LABEL: 'fma'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V4F16 = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  %V8F16 = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  %V16F16 = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+
+  %V2F32 = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
+  %V4F32 = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
+  %V8F32 = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+
+  %V2F64 = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
+  %V4F64 = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+
+  ret void
+}
+
+define void @fmuladd() {
+; CHECK-LABEL: 'fmuladd'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %V4F16 = call <vscale x 4 x half> @llvm.fmuladd.v4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  %V8F16 = call <vscale x 8 x half> @llvm.fmuladd.v8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  %V16F16 = call <vscale x 16 x half> @llvm.fmuladd.v16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+
+  %V2F32 = call <vscale x 2 x float> @llvm.fmuladd.v2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
+  %V4F32 = call <vscale x 4 x float> @llvm.fmuladd.v4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
+  %V8F32 = call <vscale x 8 x float> @llvm.fmuladd.v8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+
+  %V2F64 = call <vscale x 2 x double> @llvm.fmuladd.v2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
+  %V4F64 = call <vscale x 4 x double> @llvm.fmuladd.v4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+
+  ret void
+}
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 7e8d957..609a23b 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1277,15 +1277,15 @@ define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1>
 
 define void @histogram_v2i64(<2 x ptr> %buckets, <2 x i1> %mask) {
 ; CHECK-VSCALE-1-LABEL: 'histogram_v2i64'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:10 SizeLat:10 for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'histogram_v2i64'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:10 SizeLat:10 for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'histogram_v2i64'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:10 SizeLat:10 for: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
@@ -1294,15 +1294,15 @@ define void @histogram_v2i64(<2 x ptr> %buckets, <2 x i1> %mask) {
 
 define void @histogram_v4i32(<4 x ptr> %buckets, <4 x i1> %mask) {
 ; CHECK-VSCALE-1-LABEL: 'histogram_v4i32'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:20 for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'histogram_v4i32'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:20 for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'histogram_v4i32'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:20 for: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
@@ -1311,15 +1311,15 @@ define void @histogram_v4i32(<4 x ptr> %buckets, <4 x i1> %mask) {
 
 define void @histogram_v8i16(<8 x ptr> %buckets, <8 x i1> %mask) {
 ; CHECK-VSCALE-1-LABEL: 'histogram_v8i16'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:32 Lat:40 SizeLat:40 for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'histogram_v8i16'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:32 Lat:40 SizeLat:40 for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'histogram_v8i16'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:32 Lat:40 SizeLat:40 for: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
@@ -1328,15 +1328,15 @@ define void @histogram_v8i16(<8 x ptr> %buckets, <8 x i1> %mask) {
 
 define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) {
 ; CHECK-VSCALE-1-LABEL: 'histogram_v16i8'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:64 Lat:80 SizeLat:80 for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'histogram_v16i8'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:64 Lat:80 SizeLat:80 for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'histogram_v16i8'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:64 Lat:80 SizeLat:80 for: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   call void @llvm.experimental.vector.histogram.add.v16p0.i64(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
diff --git a/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll b/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll
index 6bcf3c7..f234341 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vec3-ops.ll
@@ -206,8 +206,8 @@ define void @vec3_float(<3 x float> %a, <3 x float> %b, ptr %src, ptr %dst) {
 define void @vec3_half(<3 x half> %a, <3 x half> %b, ptr %src, ptr %dst) {
 ; CHECK-LABEL: 'vec3_half'
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %l = load <3 x half>, ptr %src, align 1
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %add = fadd <3 x half> %l, %b
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %sub = fsub <3 x half> %add, %a
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %add = fadd <3 x half> %l, %b
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %sub = fsub <3 x half> %add, %a
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <3 x half> %sub, ptr %dst, align 1
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll
index a0a0374..2a57c850 100644
--- a/llvm/test/Analysis/CostModel/ARM/cast.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cast.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 < %s  | FileCheck %s --check-prefix=CHECK-NEON
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 < %s  | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
diff --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll
index 090fcce..6b19be5 100644
--- a/llvm/test/Analysis/CostModel/ARM/cmps.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll
@@ -1,159 +1,83 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @cmps() {
-; CHECK-MVE-RECIP-LABEL: 'cmps'
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g = icmp sge <4 x i32> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %q = icmp eq <4 x ptr> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; CHECK-V8M-MAIN-RECIP-LABEL: 'cmps'
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; CHECK-V8M-BASE-RECIP-LABEL: 'cmps'
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; CHECK-V8R-RECIP-LABEL: 'cmps'
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; CHECK-MVE-SIZE-LABEL: 'cmps'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; CHECK-V8M-MAIN-SIZE-LABEL: 'cmps'
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; CHECK-V8M-BASE-SIZE-LABEL: 'cmps'
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; CHECK-V8R-SIZE-LABEL: 'cmps'
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; CHECK-MVE-LABEL: 'cmps'
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a = icmp slt i8 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %b = icmp ult i16 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c = icmp sge i32 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %d = icmp ne i64 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %e = icmp slt <16 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %f = icmp ult <8 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %g = icmp sge <4 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a7 = fcmp oge half undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a8 = fcmp ogt float undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %a9 = fcmp ogt double undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a10 = fcmp olt <8 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a11 = fcmp oge <4 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %p = icmp eq ptr undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %q = icmp eq <4 x ptr> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; CHECK-V8M-MAIN-LABEL: 'cmps'
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %a = icmp slt i8 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %b = icmp ult i16 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %c = icmp sge i32 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %d = icmp ne i64 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %e = icmp slt <16 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %f = icmp ult <8 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %g = icmp sge <4 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %a7 = fcmp oge half undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %a8 = fcmp ogt float undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %a9 = fcmp ogt double undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %a10 = fcmp olt <8 x half> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %a11 = fcmp oge <4 x float> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 6 for: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %p = icmp eq ptr undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %q = icmp eq <4 x ptr> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; CHECK-V8M-BASE-LABEL: 'cmps'
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %a = icmp slt i8 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %b = icmp ult i16 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %c = icmp sge i32 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %d = icmp ne i64 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %e = icmp slt <16 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %f = icmp ult <8 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %g = icmp sge <4 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %a7 = fcmp oge half undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %a8 = fcmp ogt float undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %a9 = fcmp ogt double undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %a10 = fcmp olt <8 x half> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %a11 = fcmp oge <4 x float> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 6 for: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %p = icmp eq ptr undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %q = icmp eq <4 x ptr> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; CHECK-V8R-LABEL: 'cmps'
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %a = icmp slt i8 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %b = icmp ult i16 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %c = icmp sge i32 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %d = icmp ne i64 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %e = icmp slt <16 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %f = icmp ult <8 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %g = icmp sge <4 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %a7 = fcmp oge half undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %a8 = fcmp ogt float undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %a9 = fcmp ogt double undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 24 for: %a10 = fcmp olt <8 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %a11 = fcmp oge <4 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %p = icmp eq ptr undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %q = icmp eq <4 x ptr> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %a = icmp slt i8 undef, undef
   %b = icmp ult i16 undef, undef
@@ -174,125 +98,65 @@ define i32 @cmps() {
 }
 
 define void @minmax() {
-; CHECK-MVE-RECIP-LABEL: 'minmax'
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c1 = icmp slt i8 undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s1 = select i1 %c1, i8 undef, i8 undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = icmp slt i16 undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2 = select i1 %c2, i16 undef, i16 undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-V8M-MAIN-RECIP-LABEL: 'minmax'
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c1 = icmp slt i8 undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s1 = select i1 %c1, i8 undef, i8 undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = icmp slt i16 undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2 = select i1 %c2, i16 undef, i16 undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
-; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-V8M-BASE-RECIP-LABEL: 'minmax'
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c1 = icmp slt i8 undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s1 = select i1 %c1, i8 undef, i8 undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = icmp slt i16 undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2 = select i1 %c2, i16 undef, i16 undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
-; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-V8R-RECIP-LABEL: 'minmax'
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c1 = icmp slt i8 undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s1 = select i1 %c1, i8 undef, i8 undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = icmp slt i16 undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2 = select i1 %c2, i16 undef, i16 undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
-; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-MVE-SIZE-LABEL: 'minmax'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c1 = icmp slt i8 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s1 = select i1 %c1, i8 undef, i8 undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = icmp slt i16 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s2 = select i1 %c2, i16 undef, i16 undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-V8M-MAIN-SIZE-LABEL: 'minmax'
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c1 = icmp slt i8 undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s1 = select i1 %c1, i8 undef, i8 undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = icmp slt i16 undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s2 = select i1 %c2, i16 undef, i16 undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-V8M-BASE-SIZE-LABEL: 'minmax'
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c1 = icmp slt i8 undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s1 = select i1 %c1, i8 undef, i8 undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = icmp slt i16 undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s2 = select i1 %c2, i16 undef, i16 undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; CHECK-V8R-SIZE-LABEL: 'minmax'
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c1 = icmp slt i8 undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s1 = select i1 %c1, i8 undef, i8 undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = icmp slt i16 undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2 = select i1 %c2, i16 undef, i16 undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-MVE-LABEL: 'minmax'
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c1 = icmp slt i8 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s1 = select i1 %c1, i8 undef, i8 undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c2 = icmp slt i16 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s2 = select i1 %c2, i16 undef, i16 undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c3 = icmp slt i32 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s3 = select i1 %c3, i32 undef, i32 undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %c4 = icmp slt <4 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %c5 = icmp slt ptr undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-V8M-MAIN-LABEL: 'minmax'
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %c1 = icmp slt i8 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s1 = select i1 %c1, i8 undef, i8 undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %c2 = icmp slt i16 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s2 = select i1 %c2, i16 undef, i16 undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %c3 = icmp slt i32 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s3 = select i1 %c3, i32 undef, i32 undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 0 for: %c4 = icmp slt <4 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %c5 = icmp slt ptr undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+; CHECK-V8M-BASE-LABEL: 'minmax'
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %c1 = icmp slt i8 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s1 = select i1 %c1, i8 undef, i8 undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %c2 = icmp slt i16 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s2 = select i1 %c2, i16 undef, i16 undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %c3 = icmp slt i32 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s3 = select i1 %c3, i32 undef, i32 undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 0 for: %c4 = icmp slt <4 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:20 Lat:16 SizeLat:16 for: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %c5 = icmp slt ptr undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:2 Lat:1 SizeLat:1 for: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:12 Lat:8 SizeLat:8 for: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
+;
+; CHECK-V8R-LABEL: 'minmax'
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %c1 = icmp slt i8 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %s1 = select i1 %c1, i8 undef, i8 undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %c2 = icmp slt i16 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %s2 = select i1 %c2, i16 undef, i16 undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %c3 = icmp slt i32 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %s3 = select i1 %c3, i32 undef, i32 undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 0 for: %c4 = icmp slt <4 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %c5 = icmp slt ptr undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 1 for: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %c1 = icmp slt i8 undef, undef
   %s1 = select i1 %c1, i8 undef, i8 undef
diff --git a/llvm/test/Analysis/CostModel/ARM/control-flow.ll b/llvm/test/Analysis/CostModel/ARM/control-flow.ll
index dcf9028..af7426b 100644
--- a/llvm/test/Analysis/CostModel/ARM/control-flow.ll
+++ b/llvm/test/Analysis/CostModel/ARM/control-flow.ll
@@ -1,151 +1,55 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size  -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-SIZE
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size  -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M-SIZE
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size  -mtriple=thumbv8.1m.main -mattr=+mve  | FileCheck %s --check-prefix=CHECK-MVE-SIZE
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size  -mtriple=armv8a -mattr=+neon  | FileCheck %s --check-prefix=CHECK-NEON-SIZE
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency    -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-LATENCY
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency    -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M-LATENCY
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency    -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-LATENCY
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-THROUGHPUT
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M-THROUGHPUT
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-THROUGHPUT
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-MVE-THROUGHPUT
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=armv8a -mattr=+neon | FileCheck %s --check-prefix=CHECK-NEON-THROUGHPUT
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output  -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output  -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output  -mtriple=thumbv8.1m.main -mattr=+mve  | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output  -mtriple=armv8a -mattr=+neon  | FileCheck %s --check-prefix=CHECK-NEON
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @simple_loop_cost(i32 %N) {
-; CHECK-T1-SIZE-LABEL: 'simple_loop_cost'
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8M-SIZE-LABEL: 'simple_loop_cost'
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-MVE-SIZE-LABEL: 'simple_loop_cost'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-NEON-SIZE-LABEL: 'simple_loop_cost'
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-T1-LATENCY-LABEL: 'simple_loop_cost'
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8M-LATENCY-LABEL: 'simple_loop_cost'
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8_1M-LATENCY-LABEL: 'simple_loop_cost'
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-T1-THROUGHPUT-LABEL: 'simple_loop_cost'
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8M-THROUGHPUT-LABEL: 'simple_loop_cost'
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8_1M-THROUGHPUT-LABEL: 'simple_loop_cost'
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-MVE-THROUGHPUT-LABEL: 'simple_loop_cost'
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
-;
-; CHECK-NEON-THROUGHPUT-LABEL: 'simple_loop_cost'
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
+; CHECK-T1-LABEL: 'simple_loop_cost'
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: br label %loop
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: ret i32 %res
+;
+; CHECK-V8M-LABEL: 'simple_loop_cost'
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: br label %loop
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 %res
+;
+; CHECK-MVE-LABEL: 'simple_loop_cost'
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br label %loop
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %res
+;
+; CHECK-NEON-LABEL: 'simple_loop_cost'
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br label %loop
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %res
 ;
 entry:
   %zero = icmp eq i32 %N, 0
@@ -166,197 +70,69 @@ exit:
 }
 
 define i32 @simple_mul_loop(ptr %A, ptr %B, i32 %N) {
-; CHECK-T1-SIZE-LABEL: 'simple_mul_loop'
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8M-SIZE-LABEL: 'simple_mul_loop'
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-MVE-SIZE-LABEL: 'simple_mul_loop'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-NEON-SIZE-LABEL: 'simple_mul_loop'
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-T1-LATENCY-LABEL: 'simple_mul_loop'
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8M-LATENCY-LABEL: 'simple_mul_loop'
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8_1M-LATENCY-LABEL: 'simple_mul_loop'
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-T1-THROUGHPUT-LABEL: 'simple_mul_loop'
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8M-THROUGHPUT-LABEL: 'simple_mul_loop'
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8_1M-THROUGHPUT-LABEL: 'simple_mul_loop'
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-MVE-THROUGHPUT-LABEL: 'simple_mul_loop'
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
-;
-; CHECK-NEON-THROUGHPUT-LABEL: 'simple_mul_loop'
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
+; CHECK-T1-LABEL: 'simple_mul_loop'
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: br label %loop
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load i32, ptr %addr.a, align 4
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %mul = mul i32 %load, %load
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: store i32 %mul, ptr %addr.b, align 4
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: ret i32 %res
+;
+; CHECK-V8M-LABEL: 'simple_mul_loop'
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: br label %loop
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 0 for: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load i32, ptr %addr.a, align 4
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %mul = mul i32 %load, %load
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 0 for: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: store i32 %mul, ptr %addr.b, align 4
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 %res
+;
+; CHECK-MVE-LABEL: 'simple_mul_loop'
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br label %loop
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load i32, ptr %addr.a, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %mul = mul i32 %load, %load
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: store i32 %mul, ptr %addr.b, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %res
+;
+; CHECK-NEON-LABEL: 'simple_mul_loop'
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br label %loop
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load i32, ptr %addr.a, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %mul = mul i32 %load, %load
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store i32 %mul, ptr %addr.b, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %res
 ;
 entry:
   %zero = icmp eq i32 %N, 0
@@ -382,233 +158,81 @@ exit:
 }
 
 define i32 @simple_mul_ext_lsr_loop(ptr %A, ptr %B, i32 %N) {
-; CHECK-T1-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8M-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-MVE-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-NEON-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-T1-LATENCY-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8M-LATENCY-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8_1M-LATENCY-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-T1-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8M-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-V8_1M-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
-;
-; CHECK-MVE-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
-;
-; CHECK-NEON-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
+; CHECK-T1-LABEL: 'simple_mul_ext_lsr_loop'
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: br label %loop
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load i16, ptr %addr.a, align 2
+; CHECK-T1-NEXT:  Cost Model: Found costs of 0 for: %sext = sext i16 %load to i32
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %mul = mul i32 %sext, 7
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: store i32 %mul, ptr %addr.b, align 4
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 0 for: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 0 for: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-T1-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-T1-NEXT:  Cost Model: Found costs of 1 for: ret i32 %res
+;
+; CHECK-V8M-LABEL: 'simple_mul_ext_lsr_loop'
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: br label %loop
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load i16, ptr %addr.a, align 2
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 0 for: %sext = sext i16 %load to i32
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %mul = mul i32 %sext, 7
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: store i32 %mul, ptr %addr.b, align 4
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 0 for: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 0 for: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-V8M-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:0 Lat:0 SizeLat:0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 %res
+;
+; CHECK-MVE-LABEL: 'simple_mul_ext_lsr_loop'
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br label %loop
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load i16, ptr %addr.a, align 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %sext = sext i16 %load to i32
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %mul = mul i32 %sext, 7
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: store i32 %mul, ptr %addr.b, align 4
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %res
+;
+; CHECK-NEON-LABEL: 'simple_mul_ext_lsr_loop'
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %zero = icmp eq i32 %N, 0
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %zero, label %exit, label %preheader
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br label %loop
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %load = load i16, ptr %addr.a, align 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %sext = sext i16 %load to i32
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %mul = mul i32 %sext, 7
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: store i32 %mul, ptr %addr.b, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %iv.next = add nuw i32 %iv, 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 1 for: %cmp = icmp ne i32 %iv.next, %N
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: br i1 %cmp, label %loop, label %exit
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 0 for: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %res
 ;
 entry:
   %zero = icmp eq i32 %N, 0
diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll
index 9f0c29c..76f80da 100644
--- a/llvm/test/Analysis/CostModel/ARM/divrem.ll
+++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll
@@ -1,67 +1,67 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK-NEON
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 < %s | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8r-none-eabi -mattr=+neon,+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK-V8R
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @i8() {
 ; CHECK-NEON-LABEL: 'i8'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i8 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i8 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i8 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i8 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i8 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i8 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = sdiv i8 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = udiv i8 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %3 = srem i8 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %4 = urem i8 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %5 = sdiv i8 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %6 = udiv i8 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %7 = srem i8 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %8 = urem i8 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'i8'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i8 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i8 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i8 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i8 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %1 = sdiv i8 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %2 = udiv i8 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i8 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i8 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %5 = sdiv i8 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %6 = udiv i8 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i8 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i8 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i8'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i8 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i8 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i8 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i8 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %1 = sdiv i8 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %2 = udiv i8 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i8 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i8 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %5 = sdiv i8 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %6 = udiv i8 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i8 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i8 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i8'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i8 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i8 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i8 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i8 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %1 = sdiv i8 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %2 = udiv i8 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i8 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i8 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %5 = sdiv i8 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %6 = udiv i8 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i8 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i8 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'i8'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i8 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = srem i8 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = urem i8 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i8 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = srem i8 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = urem i8 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %1 = sdiv i8 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %2 = udiv i8 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %3 = srem i8 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %4 = urem i8 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %5 = sdiv i8 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %6 = udiv i8 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %7 = srem i8 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %8 = urem i8 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = sdiv i8 undef, undef
   %2 = udiv i8 undef, undef
@@ -76,59 +76,59 @@ define void @i8() {
 
 define void @i16() {
 ; CHECK-NEON-LABEL: 'i16'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i16 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i16 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i16 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i16 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i16 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i16 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = sdiv i16 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = udiv i16 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %3 = srem i16 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %4 = urem i16 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %5 = sdiv i16 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %6 = udiv i16 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %7 = srem i16 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %8 = urem i16 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'i16'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i16 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i16 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i16 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i16 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %1 = sdiv i16 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %2 = udiv i16 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i16 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i16 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %5 = sdiv i16 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %6 = udiv i16 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i16 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i16 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i16'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i16 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i16 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i16 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i16 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %1 = sdiv i16 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %2 = udiv i16 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i16 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i16 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %5 = sdiv i16 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %6 = udiv i16 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i16 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i16 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i16'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i16 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i16 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i16 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i16 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %1 = sdiv i16 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %2 = udiv i16 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i16 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i16 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %5 = sdiv i16 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %6 = udiv i16 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i16 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i16 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'i16'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i16 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = srem i16 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = urem i16 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i16 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = srem i16 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = urem i16 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %1 = sdiv i16 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %2 = udiv i16 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %3 = srem i16 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %4 = urem i16 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %5 = sdiv i16 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %6 = udiv i16 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %7 = srem i16 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %8 = urem i16 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = sdiv i16 undef, undef
   %2 = udiv i16 undef, undef
@@ -143,59 +143,59 @@ define void @i16() {
 
 define void @i32() {
 ; CHECK-NEON-LABEL: 'i32'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i32 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i32 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i32 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i32 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i32 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i32 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = sdiv i32 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = udiv i32 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %3 = srem i32 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %4 = urem i32 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %5 = sdiv i32 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %6 = udiv i32 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %7 = srem i32 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %8 = urem i32 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'i32'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i32 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i32 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i32 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i32 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %1 = sdiv i32 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %2 = udiv i32 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i32 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i32 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %5 = sdiv i32 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %6 = udiv i32 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i32 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i32 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i32'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i32 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i32 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i32 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i32 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %1 = sdiv i32 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %2 = udiv i32 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i32 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i32 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %5 = sdiv i32 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %6 = udiv i32 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i32 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i32 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i32'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i32 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i32 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i32 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i32 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %1 = sdiv i32 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %2 = udiv i32 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i32 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i32 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %5 = sdiv i32 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %6 = udiv i32 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i32 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i32 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'i32'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = udiv i32 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = srem i32 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = urem i32 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sdiv i32 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = srem i32 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = urem i32 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %1 = sdiv i32 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %2 = udiv i32 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %3 = srem i32 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %4 = urem i32 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %5 = sdiv i32 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %6 = udiv i32 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %7 = srem i32 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %8 = urem i32 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = sdiv i32 undef, undef
   %2 = udiv i32 undef, undef
@@ -210,59 +210,59 @@ define void @i32() {
 
 define void @i64() {
 ; CHECK-NEON-LABEL: 'i64'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = sdiv i64 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = udiv i64 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i64 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i64 undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sdiv i64 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = udiv i64 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %1 = sdiv i64 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %2 = udiv i64 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %3 = srem i64 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %4 = urem i64 undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %5 = sdiv i64 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %6 = udiv i64 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %7 = srem i64 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:4 Lat:4 SizeLat:4 for: %8 = urem i64 undef, 2
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'i64'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i64 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i64 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i64 undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i64 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 2 for: %1 = sdiv i64 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 2 for: %2 = udiv i64 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i64 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i64 undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 2 for: %5 = sdiv i64 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 2 for: %6 = udiv i64 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i64 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i64 undef, 2
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'i64'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i64 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i64 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i64 undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i64 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %1 = sdiv i64 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %2 = udiv i64 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i64 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i64 undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %5 = sdiv i64 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %6 = udiv i64 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i64 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i64 undef, 2
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'i64'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i64 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = srem i64 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = urem i64 undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i64 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %1 = sdiv i64 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %2 = udiv i64 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %3 = srem i64 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %4 = urem i64 undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %5 = sdiv i64 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %6 = udiv i64 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %7 = srem i64 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %8 = urem i64 undef, 2
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'i64'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = udiv i64 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %3 = srem i64 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %4 = urem i64 undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i64 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = srem i64 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = urem i64 undef, 2
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = sdiv i64 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = udiv i64 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %3 = srem i64 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %4 = urem i64 undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %5 = sdiv i64 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %6 = udiv i64 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %7 = srem i64 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %8 = urem i64 undef, 2
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = sdiv i64 undef, undef
   %2 = udiv i64 undef, undef
@@ -277,39 +277,39 @@ define void @i64() {
 
 define void @f16() {
 ; CHECK-NEON-LABEL: 'f16'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv half undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem half undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv half undef, 0xH4000
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem half undef, 0xH4000
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv half undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem half undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv half undef, 0xH4000
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem half undef, 0xH4000
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'f16'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %1 = fdiv half undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %2 = frem half undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %3 = fdiv half undef, 0xH4000
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %4 = frem half undef, 0xH4000
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'f16'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %1 = fdiv half undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %2 = frem half undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %3 = fdiv half undef, 0xH4000
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %4 = frem half undef, 0xH4000
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'f16'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %1 = fdiv half undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %2 = frem half undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %3 = fdiv half undef, 0xH4000
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %4 = frem half undef, 0xH4000
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'f16'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv half undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem half undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv half undef, 0xH4000
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem half undef, 0xH4000
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv half undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem half undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv half undef, 0xH4000
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem half undef, 0xH4000
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv half undef, undef
   %2 = frem half undef, undef
@@ -320,39 +320,39 @@ define void @f16() {
 
 define void @f32() {
 ; CHECK-NEON-LABEL: 'f32'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv float undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem float undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv float undef, 2.000000e+00
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem float undef, 2.000000e+00
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv float undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem float undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv float undef, 2.000000e+00
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem float undef, 2.000000e+00
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'f32'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %1 = fdiv float undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %2 = frem float undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %3 = fdiv float undef, 2.000000e+00
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %4 = frem float undef, 2.000000e+00
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'f32'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %1 = fdiv float undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %2 = frem float undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %3 = fdiv float undef, 2.000000e+00
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: %4 = frem float undef, 2.000000e+00
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'f32'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %1 = fdiv float undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %2 = frem float undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %3 = fdiv float undef, 2.000000e+00
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: %4 = frem float undef, 2.000000e+00
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'f32'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv float undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem float undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv float undef, 2.000000e+00
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem float undef, 2.000000e+00
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv float undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem float undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv float undef, 2.000000e+00
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem float undef, 2.000000e+00
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv float undef, undef
   %2 = frem float undef, undef
@@ -363,39 +363,39 @@ define void @f32() {
 
 define void @f64() {
 ; CHECK-NEON-LABEL: 'f64'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv double undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem double undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv double undef, 2.000000e+00
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem double undef, 2.000000e+00
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'f64'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv double undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = frem double undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv double undef, 2.000000e+00
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = frem double undef, 2.000000e+00
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %1 = fdiv double undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %2 = frem double undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %3 = fdiv double undef, 2.000000e+00
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 1 for: %4 = frem double undef, 2.000000e+00
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'f64'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv double undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %2 = frem double undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %3 = fdiv double undef, 2.000000e+00
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %4 = frem double undef, 2.000000e+00
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'f64'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv double undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %2 = frem double undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %3 = fdiv double undef, 2.000000e+00
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %4 = frem double undef, 2.000000e+00
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'f64'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv double undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem double undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv double undef, 2.000000e+00
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem double undef, 2.000000e+00
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv double undef, undef
   %2 = frem double undef, undef
@@ -406,99 +406,99 @@ define void @f64() {
 
 define void @vi8() {
 ; CHECK-NEON-LABEL: 'vi8'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %f1 = sdiv <4 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %f2 = udiv <4 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %e1 = sdiv <8 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %e2 = udiv <8 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i8> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi8'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t1 = sdiv <2 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t2 = udiv <2 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t3 = srem <2 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t4 = urem <2 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f1 = sdiv <4 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f2 = udiv <4 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f3 = srem <4 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f4 = urem <4 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e1 = sdiv <8 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e2 = udiv <8 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e3 = srem <8 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e4 = urem <8 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s1 = sdiv <16 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s2 = udiv <16 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s3 = srem <16 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s4 = urem <16 x i8> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi8'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i8> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi8'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i8> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi8'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %f1 = sdiv <4 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %f2 = udiv <4 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %e1 = sdiv <8 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %e2 = udiv <8 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i8> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %t1 = sdiv <2 x i8> undef, undef
   %t2 = udiv <2 x i8> undef, undef
@@ -521,99 +521,99 @@ define void @vi8() {
 
 define void @vi16() {
 ; CHECK-NEON-LABEL: 'vi16'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %f1 = sdiv <4 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %f2 = udiv <4 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i16> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi16'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t1 = sdiv <2 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t2 = udiv <2 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t3 = srem <2 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t4 = urem <2 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f1 = sdiv <4 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f2 = udiv <4 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f3 = srem <4 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f4 = urem <4 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e1 = sdiv <8 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e2 = udiv <8 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e3 = srem <8 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e4 = urem <8 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s1 = sdiv <16 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s2 = udiv <16 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s3 = srem <16 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s4 = urem <16 x i16> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi16'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i16> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi16'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i16> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi16'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %f1 = sdiv <4 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %f2 = udiv <4 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i16> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %t1 = sdiv <2 x i16> undef, undef
   %t2 = udiv <2 x i16> undef, undef
@@ -636,99 +636,99 @@ define void @vi16() {
 
 define void @vi32() {
 ; CHECK-NEON-LABEL: 'vi32'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f1 = sdiv <4 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f2 = udiv <4 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i32> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi32'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t1 = sdiv <2 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t2 = udiv <2 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t3 = srem <2 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t4 = urem <2 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f1 = sdiv <4 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f2 = udiv <4 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f3 = srem <4 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f4 = urem <4 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e1 = sdiv <8 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e2 = udiv <8 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e3 = srem <8 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e4 = urem <8 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s1 = sdiv <16 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s2 = udiv <16 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s3 = srem <16 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s4 = urem <16 x i32> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi32'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i32> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi32'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i32> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi32'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f1 = sdiv <4 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f2 = udiv <4 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i32> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %t1 = sdiv <2 x i32> undef, undef
   %t2 = udiv <2 x i32> undef, undef
@@ -751,99 +751,99 @@ define void @vi32() {
 
 define void @vi64() {
 ; CHECK-NEON-LABEL: 'vi64'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f1 = sdiv <4 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f2 = udiv <4 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i64> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi64'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %t1 = sdiv <2 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %t2 = udiv <2 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 18 for: %t3 = srem <2 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 18 for: %t4 = urem <2 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %f1 = sdiv <4 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %f2 = udiv <4 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 36 for: %f3 = srem <4 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 36 for: %f4 = urem <4 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %e1 = sdiv <8 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %e2 = udiv <8 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 72 for: %e3 = srem <8 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 72 for: %e4 = urem <8 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 160 for: %s1 = sdiv <16 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 160 for: %s2 = udiv <16 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 144 for: %s3 = srem <16 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 144 for: %s4 = urem <16 x i64> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi64'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t1 = sdiv <2 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t2 = udiv <2 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 6 for: %t3 = srem <2 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 6 for: %t4 = urem <2 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f1 = sdiv <4 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f2 = udiv <4 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 12 for: %f3 = srem <4 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 12 for: %f4 = urem <4 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e1 = sdiv <8 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e2 = udiv <8 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 24 for: %e3 = srem <8 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 24 for: %e4 = urem <8 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s1 = sdiv <16 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s2 = udiv <16 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 48 for: %s3 = srem <16 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 48 for: %s4 = urem <16 x i64> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi64'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t1 = sdiv <2 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t2 = udiv <2 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 6 for: %t3 = srem <2 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 6 for: %t4 = urem <2 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f1 = sdiv <4 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f2 = udiv <4 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 12 for: %f3 = srem <4 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 12 for: %f4 = urem <4 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e1 = sdiv <8 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e2 = udiv <8 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 24 for: %e3 = srem <8 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 24 for: %e4 = urem <8 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s1 = sdiv <16 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s2 = udiv <16 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 48 for: %s3 = srem <16 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 48 for: %s4 = urem <16 x i64> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi64'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f1 = sdiv <4 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f2 = udiv <4 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i64> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %t1 = sdiv <2 x i64> undef, undef
   %t2 = udiv <2 x i64> undef, undef
@@ -866,49 +866,49 @@ define void @vi64() {
 
 define void @vf16() {
 ; CHECK-NEON-LABEL: 'vf16'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf16'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf16'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv <2 x half> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %2 = frem <2 x half> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %3 = fdiv <4 x half> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %4 = frem <4 x half> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %5 = fdiv <8 x half> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %6 = frem <8 x half> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf16'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv <2 x half> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %2 = frem <2 x half> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %3 = fdiv <4 x half> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %4 = frem <4 x half> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %5 = fdiv <8 x half> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %6 = frem <8 x half> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf16'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x half> undef, undef
   %2 = frem <2 x half> undef, undef
@@ -921,49 +921,49 @@ define void @vf16() {
 
 define void @vf32() {
 ; CHECK-NEON-LABEL: 'vf32'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf32'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf32'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv <2 x float> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %2 = frem <2 x float> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %3 = fdiv <4 x float> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %4 = frem <4 x float> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %5 = fdiv <8 x float> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %6 = frem <8 x float> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf32'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv <2 x float> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %2 = frem <2 x float> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %3 = fdiv <4 x float> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %4 = frem <4 x float> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %5 = fdiv <8 x float> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %6 = frem <8 x float> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf32'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x float> undef, undef
   %2 = frem <2 x float> undef, undef
@@ -976,49 +976,49 @@ define void @vf32() {
 
 define void @vf64() {
 ; CHECK-NEON-LABEL: 'vf64'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, undef
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf64'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf64'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, undef
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x double> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x double> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x double> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x double> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x double> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x double> undef, undef
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf64'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, undef
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x double> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x double> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x double> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x double> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x double> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x double> undef, undef
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf64'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, undef
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, undef
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x double> undef, undef
   %2 = frem <2 x double> undef, undef
@@ -1031,99 +1031,99 @@ define void @vf64() {
 
 define void @vi8_2() {
 ; CHECK-NEON-LABEL: 'vi8_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi8_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s4 = urem <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi8_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi8_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi8_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
   %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
@@ -1146,99 +1146,99 @@ define void @vi8_2() {
 
 define void @vi16_2() {
 ; CHECK-NEON-LABEL: 'vi16_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 10 for: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi16_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s4 = urem <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi16_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi16_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi16_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 10 for: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
   %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
@@ -1261,99 +1261,99 @@ define void @vi16_2() {
 
 define void @vi32_2() {
 ; CHECK-NEON-LABEL: 'vi32_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi32_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 10 for: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %s4 = urem <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi32_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi32_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s4 = urem <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi32_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
   %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
@@ -1376,99 +1376,99 @@ define void @vi32_2() {
 
 define void @vi64_2() {
 ; CHECK-NEON-LABEL: 'vi64_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi64_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 20 for: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 18 for: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 18 for: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 40 for: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 36 for: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 36 for: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 80 for: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 72 for: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 72 for: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 160 for: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 160 for: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 144 for: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 144 for: %s4 = urem <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi64_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 6 for: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 6 for: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 12 for: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 12 for: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 24 for: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 24 for: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 32 for: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 48 for: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 48 for: %s4 = urem <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi64_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 6 for: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 6 for: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 12 for: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 12 for: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 24 for: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 24 for: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 32 for: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 48 for: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 48 for: %s4 = urem <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi64_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 40 for: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 80 for: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 160 for: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 320 for: %s4 = urem <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
   %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
@@ -1491,49 +1491,49 @@ define void @vi64_2() {
 
 define void @vf16_2() {
 ; CHECK-NEON-LABEL: 'vf16_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf16_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf16_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %6 = frem <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf16_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %6 = frem <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf16_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x half> undef, <half 2., half 2.>
   %2 = frem <2 x half> undef, <half 2., half 2.>
@@ -1546,49 +1546,49 @@ define void @vf16_2() {
 
 define void @vf32_2() {
 ; CHECK-NEON-LABEL: 'vf32_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf32_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf32_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 2 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf32_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 2 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf32_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x float> undef, <float 2., float 2.>
   %2 = frem <2 x float> undef, <float 2., float 2.>
@@ -1601,49 +1601,49 @@ define void @vf32_2() {
 
 define void @vf64_2() {
 ; CHECK-NEON-LABEL: 'vf64_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf64_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf64_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf64_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 8 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 16 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found costs of 1 for: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf64_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %1 = fdiv <2 x double> undef, <double 2., double 2.>
   %2 = frem <2 x double> undef, <double 2., double 2.>
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
index cc8f2da..254c191 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
@@ -31,55 +30,30 @@ declare <32 x i8>  @llvm.abs.v32i8(<32 x i8>, i1)
 declare <64 x i8>  @llvm.abs.v64i8(<64 x i8>, i1)
 
 define i32 @abs(i32 %arg) {
-; MVE-RECIP-LABEL: 'abs'
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 296 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'abs'
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'abs'
+; MVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
   %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll
index 664b828..a12fd00 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
@@ -9,8 +9,8 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @v4i32(i32 %index, i32 %TC) {
 ; CHECK-LABEL: 'v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
   ret void
@@ -18,8 +18,8 @@ define void @v4i32(i32 %index, i32 %TC) {
 
 define void @v8i16(i32 %index, i32 %TC) {
 ; CHECK-LABEL: 'v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
   ret void
@@ -27,8 +27,8 @@ define void @v8i16(i32 %index, i32 %TC) {
 
 define void @v16i8(i32 %index, i32 %TC) {
 ; CHECK-LABEL: 'v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
   ret void
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll
index fa1cf17..4e6ebbc 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll
@@ -1,30 +1,30 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @icmp() {
 ; CHECK-LABEL: 'icmp'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v2i8 = icmp slt <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = icmp slt <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = icmp slt <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = icmp slt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v2i16 = icmp slt <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = icmp slt <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = icmp slt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v2i32 = icmp slt <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = icmp slt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v2i64 = icmp slt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %v8i64 = icmp slt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v2i128 = icmp slt <2 x i128> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i128 = icmp slt <4 x i128> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %v2i8 = icmp slt <2 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i8 = icmp slt <4 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = icmp slt <8 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = icmp slt <16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:132 CodeSize:130 Lat:132 SizeLat:132 for: %v32i8 = icmp slt <32 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %v2i16 = icmp slt <2 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i16 = icmp slt <4 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = icmp slt <8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:68 CodeSize:66 Lat:68 SizeLat:68 for: %v16i16 = icmp slt <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %v2i32 = icmp slt <2 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i32 = icmp slt <4 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:34 Lat:36 SizeLat:36 for: %v8i32 = icmp slt <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:68 Lat:72 SizeLat:72 for: %v16i32 = icmp slt <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:18 Lat:36 SizeLat:36 for: %v2i64 = icmp slt <2 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:36 Lat:72 SizeLat:72 for: %v4i64 = icmp slt <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:144 CodeSize:72 Lat:144 SizeLat:144 for: %v8i64 = icmp slt <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:68 CodeSize:34 Lat:68 SizeLat:68 for: %v2i128 = icmp slt <2 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:136 CodeSize:68 Lat:136 SizeLat:136 for: %v4i128 = icmp slt <4 x i128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2i8 = icmp slt <2 x i8> undef, undef
   %v4i8 = icmp slt <4 x i8> undef, undef
@@ -54,32 +54,32 @@ define void @icmp() {
 
 define void @fcmp() {
 ; CHECK-MVE-LABEL: 'fcmp'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 12 for: %v2f16 = fcmp olt <2 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 24 for: %v4f16 = fcmp olt <4 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 48 for: %v8f16 = fcmp olt <8 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 96 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 12 for: %v2f32 = fcmp olt <2 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 24 for: %v4f32 = fcmp olt <4 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 48 for: %v8f32 = fcmp olt <8 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 96 for: %v16f32 = fcmp olt <16 x float> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 12 for: %v2f64 = fcmp olt <2 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 24 for: %v4f64 = fcmp olt <4 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of 48 for: %v8f64 = fcmp olt <8 x double> undef, undef
+; CHECK-MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'fcmp'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16 = fcmp olt <2 x half> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16 = fcmp olt <4 x half> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16 = fcmp olt <8 x half> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:68 CodeSize:66 Lat:68 SizeLat:68 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32 = fcmp olt <2 x float> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f32 = fcmp olt <4 x float> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:34 Lat:36 SizeLat:36 for: %v8f32 = fcmp olt <8 x float> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:68 Lat:72 SizeLat:72 for: %v16f32 = fcmp olt <16 x float> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v2f64 = fcmp olt <2 x double> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f64 = fcmp olt <4 x double> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f64 = fcmp olt <8 x double> undef, undef
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f16 = fcmp olt <2 x half> undef, undef
   %v4f16 = fcmp olt <4 x half> undef, undef
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
index fa18f47..5a23ebf 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
@@ -1,37 +1,37 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @masked_gather() {
 ; CHECK-LABEL: 'masked_gather'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32p = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x ptr> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 96 for: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 72 for: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 34 for: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 192 for: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 80 for: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 160 for: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 72 for: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 36 for: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 320 for: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 144 for: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 72 for: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 36 for: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: %V4I32p = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x ptr> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0
 ;
   %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x double> undef)
   %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x double> undef)
@@ -72,32 +72,32 @@ define i32 @masked_gather() {
 
 define i32 @masked_scatter() {
 ; CHECK-LABEL: 'masked_scatter'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 96 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 24 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 12 for: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 72 for: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 34 for: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 192 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 80 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 160 for: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 72 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 36 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 320 for: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 144 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 72 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 36 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 18 for: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0
 ;
   call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
   call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
@@ -136,29 +136,29 @@ define i32 @masked_scatter() {
 
 define void @gep_v4i32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask)  {
 ; CHECK-LABEL: 'gep_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i32, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %indzext = zext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i32, ptr %base, <4 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %indsext = sext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i32, ptr %base, <4 x i32> %indsext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepu = getelementptr i32, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %resu = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %resos = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep1 = getelementptr i32, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indzext = zext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep2 = getelementptr i32, ptr %base, <4 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indsext = sext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep3 = getelementptr i32, ptr %base, <4 x i32> %indsext
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepu = getelementptr i32, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT:  Cost Model: Found costs of 56 for: %resu = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 56 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %resos = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %gep1 = getelementptr i32, ptr %base, <4 x i32> %ind32
   %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
@@ -195,29 +195,29 @@ define void @gep_v4i32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4
 
 define void @gep_v4f32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask)  {
 ; CHECK-LABEL: 'gep_v4f32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %gep1 = getelementptr float, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %indzext = zext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %gep2 = getelementptr float, ptr %base, <4 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res2 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %indsext = sext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %gep3 = getelementptr float, ptr %base, <4 x i32> %indsext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res3 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %gepu = getelementptr float, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %resos = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %gep1 = getelementptr float, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indzext = zext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %gep2 = getelementptr float, ptr %base, <4 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res2 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indsext = sext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %gep3 = getelementptr float, ptr %base, <4 x i32> %indsext
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res3 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %gepu = getelementptr float, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %resos = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %gep1 = getelementptr float, ptr %base, <4 x i32> %ind32
   %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
@@ -254,27 +254,27 @@ define void @gep_v4f32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4
 
 define void @gep_v4i16(ptr %base, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask)  {
 ; CHECK-LABEL: 'gep_v4i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i16, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res1, <4 x ptr> %gep1, i32 2, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %indzext = zext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i16, ptr %base, <4 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %res2 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep2, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res2, <4 x ptr> %gep2, i32 2, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %indsext = sext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i16, ptr %base, <4 x i32> %indsext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %res3 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep3, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res3, <4 x ptr> %gep3, i32 2, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i16, ptr %base, <4 x i16> %ind16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res5zext = zext <4 x i16> %res5 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res5trunc = trunc <4 x i32> %res5zext to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <4 x i16> %res6 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep1 = getelementptr i16, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT:  Cost Model: Found costs of 56 for: %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 56 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res1, <4 x ptr> %gep1, i32 2, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indzext = zext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep2 = getelementptr i16, ptr %base, <4 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 56 for: %res2 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep2, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 56 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res2, <4 x ptr> %gep2, i32 2, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indsext = sext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep3 = getelementptr i16, ptr %base, <4 x i32> %indsext
+; CHECK-NEXT:  Cost Model: Found costs of 56 for: %res3 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep3, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 56 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res3, <4 x ptr> %gep3, i32 2, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep5 = getelementptr i16, ptr %base, <4 x i16> %ind16
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res5 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %res5zext = zext <4 x i16> %res5 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %res5trunc = trunc <4 x i32> %res5zext to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res6 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %res6sext = sext <4 x i16> %res6 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %res6trunc = trunc <4 x i32> %res6sext to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %gep1 = getelementptr i16, ptr %base, <4 x i32> %ind32
   %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
@@ -308,16 +308,16 @@ define void @gep_v4i16(ptr %base, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %
 
 define void @gep_v4i8(ptr %base, <4 x i8> %ind8, <4 x i1> %mask)  {
 ; CHECK-LABEL: 'gep_v4i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i8, ptr %base, <4 x i8> %ind8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res5zext = zext <4 x i8> %res5 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res5trunc = trunc <4 x i32> %res5zext to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res6sext = sext <4 x i8> %res6 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep5 = getelementptr i8, ptr %base, <4 x i8> %ind8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res5 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %res5zext = zext <4 x i8> %res5 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %res5trunc = trunc <4 x i32> %res5zext to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res6 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %res6sext = sext <4 x i8> %res6 to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %res6trunc = trunc <4 x i32> %res6sext to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   ; result zext
   %gep5 = getelementptr i8, ptr %base, <4 x i8> %ind8
@@ -337,36 +337,36 @@ define void @gep_v4i8(ptr %base, <4 x i8> %ind8, <4 x i1> %mask)  {
 
 define void @gep_v8i16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i8> %ind8, <8 x i1> %mask)  {
 ; CHECK-LABEL: 'gep_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i16, ptr %base, <8 x i32> %ind32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %indzext = zext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i16, ptr %base, <8 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %indsext = sext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i16, ptr %base, <8 x i32> %indsext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %indzext4 = zext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i16, ptr %base, <8 x i32> %indzext4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %indtrunc = trunc <8 x i32> %ind32 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %indtrunc, <8 x ptr> %gep4, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep4, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %ressext = sext <8 x i16> %res to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %restrunc = trunc <8 x i32> %ressext to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %restrunc, <8 x ptr> %gep4, i32 4, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep1 = getelementptr i16, ptr %base, <8 x i32> %ind32
+; CHECK-NEXT:  Cost Model: Found costs of 112 for: %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 112 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indzext = zext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep2 = getelementptr i16, ptr %base, <8 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res2 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indsext = sext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep3 = getelementptr i16, ptr %base, <8 x i32> %indsext
+; CHECK-NEXT:  Cost Model: Found costs of 112 for: %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 112 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 112 for: %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 112 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %resos = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
+; CHECK-NEXT:  Cost Model: Found costs of 112 for: %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 112 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indzext4 = zext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep4 = getelementptr i16, ptr %base, <8 x i32> %indzext4
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %indtrunc = trunc <8 x i32> %ind32 to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %indtrunc, <8 x ptr> %gep4, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep4, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %ressext = sext <8 x i16> %res to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %restrunc = trunc <8 x i32> %ressext to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %restrunc, <8 x ptr> %gep4, i32 4, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   ; no offset ext
   %gep1 = getelementptr i16, ptr %base, <8 x i32> %ind32
@@ -418,28 +418,28 @@ define void @gep_v8i16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8
 
 define void @gep_v8f16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i1> %mask)  {
 ; CHECK-LABEL: 'gep_v8f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %gep1 = getelementptr half, ptr %base, <8 x i32> %ind32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %indzext = zext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %gep2 = getelementptr half, ptr %base, <8 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %indsext = sext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %gep3 = getelementptr half, ptr %base, <8 x i32> %indsext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %gep1 = getelementptr half, ptr %base, <8 x i32> %ind32
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indzext = zext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %gep2 = getelementptr half, ptr %base, <8 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res2 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indsext = sext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %gep3 = getelementptr half, ptr %base, <8 x i32> %indsext
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %resos = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   ; no offset ext
   %gep1 = getelementptr half, ptr %base, <8 x i32> %ind32
@@ -479,17 +479,17 @@ define void @gep_v8f16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8
 
 define void @gep_v8i8(ptr %base, <8 x i8> %ind8, <8 x i1> %mask)  {
 ; CHECK-LABEL: 'gep_v8i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %indzext = zext <8 x i8> %ind8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i8, ptr %base, <8 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res5 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res5zext = zext <8 x i8> %res5 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res5trunc = trunc <8 x i16> %res5zext to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res5trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %res6 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <8 x i8> %res6 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <8 x i16> %res6sext to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res6trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %indzext = zext <8 x i8> %ind8 to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep5 = getelementptr i8, ptr %base, <8 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res5 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %res5zext = zext <8 x i8> %res5 to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %res5trunc = trunc <8 x i16> %res5zext to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res5trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res6 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %res6sext = sext <8 x i8> %res6 to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %res6trunc = trunc <8 x i16> %res6sext to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res6trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   ; result zext
   %indzext = zext <8 x i8> %ind8 to <8 x i32>
@@ -510,26 +510,26 @@ define void @gep_v8i8(ptr %base, <8 x i8> %ind8, <8 x i1> %mask)  {
 
 define void @gep_v16i8(ptr %base, ptr %base16, <16 x i8> %ind8, <16 x i32> %ind32, <16 x i1> %mask)  {
 ; CHECK-LABEL: 'gep_v16i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i8, ptr %base, <16 x i32> %ind32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res1, <16 x ptr> %gep1, i32 2, <16 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %indzext = zext <16 x i8> %ind8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i8, ptr %base, <16 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %res2 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep2, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res2, <16 x ptr> %gep2, i32 2, <16 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %indsext = sext <16 x i8> %ind8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i8, ptr %base, <16 x i32> %indsext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res3, <16 x ptr> %gep3, i32 2, <16 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, ptr %base16, <16 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <16 x ptr> %gepbs to <16 x ptr>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbsb, i32 2, <16 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %indzext4 = zext <16 x i8> %ind8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %gep4 = getelementptr i8, ptr %base, <16 x i32> %indzext
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %indtrunc = trunc <16 x i32> %ind32 to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %indtrunc, <16 x ptr> %gep4, i32 2, <16 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep1 = getelementptr i8, ptr %base, <16 x i32> %ind32
+; CHECK-NEXT:  Cost Model: Found costs of 224 for: %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 224 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res1, <16 x ptr> %gep1, i32 2, <16 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %indzext = zext <16 x i8> %ind8 to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep2 = getelementptr i8, ptr %base, <16 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %res2 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep2, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res2, <16 x ptr> %gep2, i32 2, <16 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %indsext = sext <16 x i8> %ind8 to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gep3 = getelementptr i8, ptr %base, <16 x i32> %indsext
+; CHECK-NEXT:  Cost Model: Found costs of 224 for: %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 224 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res3, <16 x ptr> %gep3, i32 2, <16 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbs = getelementptr i16, ptr %base16, <16 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbsb = bitcast <16 x ptr> %gepbs to <16 x ptr>
+; CHECK-NEXT:  Cost Model: Found costs of 224 for: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 224 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbsb, i32 2, <16 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %indzext4 = zext <16 x i8> %ind8 to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %gep4 = getelementptr i8, ptr %base, <16 x i32> %indzext
+; CHECK-NEXT:  Cost Model: Found costs of 32 for: %indtrunc = trunc <16 x i32> %ind32 to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %indtrunc, <16 x ptr> %gep4, i32 2, <16 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   ; no offset ext
   %gep1 = getelementptr i8, ptr %base, <16 x i32> %ind32
@@ -565,10 +565,10 @@ define void @gep_v16i8(ptr %base, ptr %base16, <16 x i8> %ind8, <16 x i32> %ind3
 
 define void @gep_v16i8p(<16 x ptr> %base, i32 %off, <16 x i1> %mask)  {
 ; CHECK-LABEL: 'gep_v16i8p'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i8, <16 x ptr> %base, i32 %off
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbs, i32 2, <16 x i1> %mask)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %gepbs = getelementptr i8, <16 x ptr> %base, i32 %off
+; CHECK-NEXT:  Cost Model: Found costs of 224 for: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of 224 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbs, i32 2, <16 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %gepbs = getelementptr i8, <16 x ptr> %base, i32 %off
   %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll
index 01341e4..e4cc8fe 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=MVE-RECIP,MVEI-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=MVE-SIZE,MVEI-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=MVE-RECIP,MVEF-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=MVE-SIZE,MVEF-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=MVE,MVEI
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=MVE,MVEF
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
@@ -33,55 +31,30 @@ declare <32 x i8>  @llvm.smin.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.smin.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @smin(i32 %arg) {
-; MVE-RECIP-LABEL: 'smin'
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'smin'
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'smin'
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:19 Lat:38 SizeLat:38 for: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:38 Lat:76 SizeLat:76 for: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:76 Lat:152 SizeLat:152 for: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
   %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -140,55 +113,30 @@ declare <32 x i8>  @llvm.smax.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.smax.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @smax(i32 %arg) {
-; MVE-RECIP-LABEL: 'smax'
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'smax'
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'smax'
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:19 Lat:38 SizeLat:38 for: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:38 Lat:76 SizeLat:76 for: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:76 Lat:152 SizeLat:152 for: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
   %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -248,55 +196,30 @@ declare <32 x i8>  @llvm.umin.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.umin.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @umin(i32 %arg) {
-; MVE-RECIP-LABEL: 'umin'
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'umin'
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'umin'
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:19 Lat:38 SizeLat:38 for: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:38 Lat:76 SizeLat:76 for: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:76 Lat:152 SizeLat:152 for: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
   %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -354,56 +277,31 @@ declare <16 x i8>  @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
 declare <32 x i8>  @llvm.umax.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.umax.v64i8(<64 x i8>, <64 x i8>)
 
-define i32 @sub(i32 %arg) {
-; MVE-RECIP-LABEL: 'sub'
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'sub'
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+define i32 @umax(i32 %arg) {
+; MVE-LABEL: 'umax'
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:19 Lat:38 SizeLat:38 for: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:38 Lat:76 SizeLat:76 for: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:76 Lat:152 SizeLat:152 for: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
   %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -455,77 +353,41 @@ declare <16 x half> @llvm.minnum.v16f16(<16 x half>, <16 x half>)
 declare <32 x half> @llvm.minnum.v32f16(<32 x half>, <32 x half>)
 
 define float @minnum(float %arg) {
-; MVEI-RECIP-LABEL: 'minnum'
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
+; MVEI-LABEL: 'minnum'
+; MVEI-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F64 = call double @llvm.minnum.f64(double undef, double undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F32 = call float @llvm.minnum.f32(float undef, float undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:176 CodeSize:32 Lat:176 SizeLat:176 for: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F16 = call half @llvm.minnum.f16(half undef, half undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:176 CodeSize:32 Lat:176 SizeLat:176 for: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:352 CodeSize:64 Lat:352 SizeLat:352 for: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret float undef
 ;
-; MVEI-SIZE-LABEL: 'minnum'
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
-;
-; MVEF-RECIP-LABEL: 'minnum'
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
-;
-; MVEF-SIZE-LABEL: 'minnum'
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
+; MVEF-LABEL: 'minnum'
+; MVEF-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F64 = call double @llvm.minnum.f64(double undef, double undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
+; MVEF-NEXT:  Cost Model: Found costs of 1 for: %F32 = call float @llvm.minnum.f32(float undef, float undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
+; MVEF-NEXT:  Cost Model: Found costs of 1 for: %F16 = call half @llvm.minnum.f16(half undef, half undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret float undef
 ;
   %F64 = call double @llvm.minnum.f64(double undef, double undef)
   %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
@@ -567,77 +429,41 @@ declare <16 x half> @llvm.maxnum.v16f16(<16 x half>, <16 x half>)
 declare <32 x half> @llvm.maxnum.v32f16(<32 x half>, <32 x half>)
 
 define float @maxnum(float %arg) {
-; MVEI-RECIP-LABEL: 'maxnum'
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEI-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
-;
-; MVEI-SIZE-LABEL: 'maxnum'
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
-;
-; MVEF-RECIP-LABEL: 'maxnum'
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEF-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float undef
+; MVEI-LABEL: 'maxnum'
+; MVEI-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:176 CodeSize:32 Lat:176 SizeLat:176 for: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:176 CodeSize:32 Lat:176 SizeLat:176 for: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:352 CodeSize:64 Lat:352 SizeLat:352 for: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
+; MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret float undef
 ;
-; MVEF-SIZE-LABEL: 'maxnum'
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEF-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret float undef
+; MVEF-LABEL: 'maxnum'
+; MVEF-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
+; MVEF-NEXT:  Cost Model: Found costs of 1 for: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
+; MVEF-NEXT:  Cost Model: Found costs of 1 for: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
+; MVEF-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret float undef
 ;
   %F64 = call double @llvm.maxnum.f64(double undef, double undef)
   %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll b/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll
index 6a327cf..bd85461 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll
@@ -1,60 +1,60 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF2
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF2
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -mve-max-interleave-factor=4 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF4
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -mve-max-interleave-factor=4 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF4
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF2
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF2
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -mve-max-interleave-factor=4 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF4
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -mve-max-interleave-factor=4 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF4
 
 define void @vld2(ptr %p) {
 ; CHECK-LABEL: 'vld2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <4 x i8>, ptr %p, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4i8_1 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <8 x i8>, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i8_0 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i8_1 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = load <16 x i8>, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v16i8_0 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v16i8_1 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8 = load <32 x i8>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v32i8_0 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v32i8_1 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <4 x i16>, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4i16_0 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4i16_1 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = load <8 x i16>, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i16_0 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i16_1 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = load <16 x i16>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v16i16_0 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v16i16_1 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i16 = load <32 x i16>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v32i16_0 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v32i16_1 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = load <4 x i32>, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4i32_0 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4i32_1 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = load <8 x i32>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i32_0 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i32_1 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = load <16 x i32>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v16i32_0 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v16i32_1 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i32 = load <32 x i32>, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v32i32_0 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v32i32_1 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <4 x i64>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v2i64_0 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v2i64_1 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = load <8 x i64>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v4i64_0 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v4i64_1 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i64 = load <16 x i64>, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v8i64_0 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v8i64_1 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i64 = load <32 x i64>, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1536 for instruction: %v16i64_0 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1536 for instruction: %v16i64_1 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i8 = load <4 x i8>, ptr %p, align 4
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i8_1 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v8i8 = load <8 x i8>, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i8_0 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i8_1 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v16i8 = load <16 x i8>, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i8_0 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i8_1 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v32i8 = load <32 x i8>, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v32i8_0 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v32i8_1 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i16 = load <4 x i16>, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i16_0 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i16_1 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v8i16 = load <8 x i16>, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i16_0 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i16_1 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v16i16 = load <16 x i16>, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i16_0 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i16_1 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v32i16 = load <32 x i16>, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v32i16_0 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v32i16_1 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i32 = load <4 x i32>, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i32_0 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i32_1 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v8i32 = load <8 x i32>, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i32_0 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i32_1 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v16i32 = load <16 x i32>, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i32_0 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i32_1 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v32i32 = load <32 x i32>, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v32i32_0 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v32i32_1 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v2i64 = load <4 x i64>, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v2i64_0 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v2i64_1 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v4i64 = load <8 x i64>, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v4i64_0 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v4i64_1 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v8i64 = load <16 x i64>, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v8i64_0 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v8i64_1 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v16i64 = load <32 x i64>, ptr %p, align 256
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1536 CodeSize:768 Lat:1536 SizeLat:1536 for: %v16i64_0 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1536 CodeSize:768 Lat:1536 SizeLat:1536 for: %v16i64_1 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v4i8 = load <4 x i8>, ptr %p
   %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
@@ -114,71 +114,71 @@ define void @vld2(ptr %p) {
 
 define void @vld3(ptr %p) {
 ; CHECK-LABEL: 'vld3'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2i8 = load <6 x i8>, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v2i8_0 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v2i8_1 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 1, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v2i8_2 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 2, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %v4i8 = load <12 x i8>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v4i8_0 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v4i8_1 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v4i8_2 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = load <24 x i8>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = load <48 x i8>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2i16 = load <6 x i16>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v2i16_0 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v2i16_1 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 1, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v2i16_2 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 2, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = load <12 x i16>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = load <24 x i16>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = load <48 x i16>, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = load <6 x i32>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v2i32_0 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v2i32_2 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = load <12 x i32>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = load <24 x i32>, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i32 = load <48 x i32>, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = load <6 x i64>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v2i64_0 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v2i64_1 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 1, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v2i64_2 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 2, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = load <12 x i64>, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v4i64_0 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v4i64_1 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v4i64_2 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = load <24 x i64>, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v8i64_0 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v8i64_1 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v8i64_2 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16i64 = load <48 x i64>, ptr %p, align 512
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: %v16i64_0 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: %v16i64_1 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: %v16i64_2 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:50 CodeSize:1 Lat:4 SizeLat:1 for: %v2i8 = load <6 x i8>, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i8_0 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i8_1 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 1, i32 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i8_2 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 2, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:98 CodeSize:1 Lat:4 SizeLat:1 for: %v4i8 = load <12 x i8>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i8_0 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i8_1 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i8_2 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v8i8 = load <24 x i8>, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v16i8 = load <48 x i8>, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:50 CodeSize:1 Lat:4 SizeLat:1 for: %v2i16 = load <6 x i16>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i16_0 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i16_1 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 1, i32 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i16_2 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 2, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v4i16 = load <12 x i16>, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v8i16 = load <24 x i16>, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v16i16 = load <48 x i16>, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v2i32 = load <6 x i32>, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i32_0 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i32_2 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v4i32 = load <12 x i32>, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v8i32 = load <24 x i32>, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v16i32 = load <48 x i32>, ptr %p, align 256
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v2i64 = load <6 x i64>, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v2i64_0 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v2i64_1 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 1, i32 4>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v2i64_2 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 2, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v4i64 = load <12 x i64>, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v4i64_0 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v4i64_1 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v4i64_2 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v8i64 = load <24 x i64>, ptr %p, align 256
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v8i64_0 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v8i64_1 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v8i64_2 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:1 Lat:4 SizeLat:1 for: %v16i64 = load <48 x i64>, ptr %p, align 512
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v16i64_0 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v16i64_1 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v16i64_2 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2i8 = load <6 x i8>, ptr %p
   %v2i8_0 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 0, i32 3>
@@ -252,88 +252,171 @@ define void @vld3(ptr %p) {
 }
 
 define void @vld4(ptr %p) {
-; CHECK-LABEL: 'vld4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = load <8 x i8>, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <16 x i8>, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = load <16 x i64>, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = load <32 x i64>, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16i64 = load <64 x i64>, ptr %p, align 512
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-UF2-LABEL: 'vld4'
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v2i8 = load <8 x i8>, ptr %p, align 8
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i8 = load <16 x i8>, ptr %p, align 8
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v8i8 = load <32 x i8>, ptr %p, align 32
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v16i8 = load <64 x i8>, ptr %p, align 64
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v2i16 = load <8 x i16>, ptr %p, align 8
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v4i16 = load <16 x i16>, ptr %p, align 32
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v8i16 = load <32 x i16>, ptr %p, align 64
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v16i16 = load <64 x i16>, ptr %p, align 128
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v2i32 = load <8 x i32>, ptr %p, align 32
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v4i32 = load <16 x i32>, ptr %p, align 64
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v8i32 = load <32 x i32>, ptr %p, align 128
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v16i32 = load <64 x i32>, ptr %p, align 256
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v2i64 = load <8 x i64>, ptr %p, align 64
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v4i64 = load <16 x i64>, ptr %p, align 128
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v8i64 = load <32 x i64>, ptr %p, align 256
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:1 Lat:4 SizeLat:1 for: %v16i64 = load <64 x i64>, ptr %p, align 512
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-UF4-LABEL: 'vld4'
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v2i8 = load <8 x i8>, ptr %p, align 8
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i8 = load <16 x i8>, ptr %p, align 8
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v8i8 = load <32 x i8>, ptr %p, align 32
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v16i8 = load <64 x i8>, ptr %p, align 64
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v2i16 = load <8 x i16>, ptr %p, align 8
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v4i16 = load <16 x i16>, ptr %p, align 32
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v8i16 = load <32 x i16>, ptr %p, align 64
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v16i16 = load <64 x i16>, ptr %p, align 128
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v2i32 = load <8 x i32>, ptr %p, align 32
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v4i32 = load <16 x i32>, ptr %p, align 64
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v8i32 = load <32 x i32>, ptr %p, align 128
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:132 CodeSize:66 Lat:132 SizeLat:132 for: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:132 CodeSize:66 Lat:132 SizeLat:132 for: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:132 CodeSize:66 Lat:132 SizeLat:132 for: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:132 CodeSize:66 Lat:132 SizeLat:132 for: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v16i32 = load <64 x i32>, ptr %p, align 256
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:264 CodeSize:132 Lat:264 SizeLat:264 for: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:264 CodeSize:132 Lat:264 SizeLat:264 for: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:264 CodeSize:132 Lat:264 SizeLat:264 for: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:264 CodeSize:132 Lat:264 SizeLat:264 for: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v2i64 = load <8 x i64>, ptr %p, align 64
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v4i64 = load <16 x i64>, ptr %p, align 128
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v8i64 = load <32 x i64>, ptr %p, align 256
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:1 Lat:4 SizeLat:1 for: %v16i64 = load <64 x i64>, ptr %p, align 512
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2i8 = load <8 x i8>, ptr %p
   %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4>
@@ -424,39 +507,39 @@ define void @vld4(ptr %p) {
 
 define void @vst2(ptr %p) {
 ; CHECK-LABEL: 'vst2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4i8, ptr %p, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8i8, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16i8, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <32 x i8> %v32i8, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v4i16, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v8i16, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x i16> %v16i16, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <32 x i16> %v32i16, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v4i32, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i32> %v8i32, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <16 x i32> %v16i32, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <32 x i32> %v32i32, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i64> %v4i64, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i64> %v8i64, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i64> %v16i64, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: store <32 x i64> %v32i64, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i8> %v4i8, ptr %p, align 4
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i8> %v8i8, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> %v16i8, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i8> %v32i8, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i16> %v4i16, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> %v8i16, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i16> %v16i16, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i16> %v32i16, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> %v4i32, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i32> %v8i32, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i32> %v16i32, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i32> %v32i32, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i64> %v4i64, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i64> %v8i64, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i64> %v16i64, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i64> %v32i64, ptr %p, align 256
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
   store <4 x i8> %v4i8, ptr %p
@@ -500,39 +583,39 @@ define void @vst2(ptr %p) {
 
 define void @vst3(ptr %p) {
 ; CHECK-LABEL: 'vst3'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: store <6 x i8> %v8i8, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: store <12 x i8> %v16i8, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <24 x i8> %v32i8, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v64i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <48 x i8> %v64i8, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: store <6 x i16> %v8i16, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <12 x i16> %v16i16, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <24 x i16> %v32i16, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v64i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <48 x i16> %v64i16, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <6 x i32> %v8i32, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <12 x i32> %v16i32, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <24 x i32> %v32i32, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v64i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: store <48 x i32> %v64i32, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <6 x i64> %v8i64, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <12 x i64> %v16i64, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: store <24 x i64> %v32i64, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1536 for instruction: %v64i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: store <48 x i64> %v64i64, ptr %p, align 512
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:96 CodeSize:48 Lat:96 SizeLat:96 for: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: store <6 x i8> %v8i8, ptr %p, align 8
+; CHECK-NEXT:  Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:98 CodeSize:1 Lat:1 SizeLat:1 for: store <12 x i8> %v16i8, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <24 x i8> %v32i8, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v64i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <48 x i8> %v64i8, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:96 CodeSize:48 Lat:96 SizeLat:96 for: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: store <6 x i16> %v8i16, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <12 x i16> %v16i16, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <24 x i16> %v32i16, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v64i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <48 x i16> %v64i16, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:96 CodeSize:48 Lat:96 SizeLat:96 for: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <6 x i32> %v8i32, ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <12 x i32> %v16i32, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <24 x i32> %v32i32, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v64i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <48 x i32> %v64i32, ptr %p, align 256
+; CHECK-NEXT:  Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <6 x i64> %v8i64, ptr %p, align 64
+; CHECK-NEXT:  Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <12 x i64> %v16i64, ptr %p, align 128
+; CHECK-NEXT:  Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <24 x i64> %v32i64, ptr %p, align 256
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1536 CodeSize:768 Lat:1536 SizeLat:1536 for: %v64i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:1 Lat:1 SizeLat:1 for: store <48 x i64> %v64i64, ptr %p, align 512
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
   store <6 x i8> %v8i8, ptr %p
@@ -575,40 +658,75 @@ define void @vst3(ptr %p) {
 
 
 define void @vst4(ptr %p) {
-; CHECK-LABEL: 'vst4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8i8, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16i8, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <32 x i8> %v32i8, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <64 x i8> %v64i8, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v8i16, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <16 x i16> %v16i16, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <32 x i16> %v32i16, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <64 x i16> %v64i16, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i32> %v8i32, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <16 x i32> %v16i32, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <32 x i32> %v32i32, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: store <64 x i32> %v64i32, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i64> %v8i64, ptr %p, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 512 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i64> %v16i64, ptr %p, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: store <32 x i64> %v32i64, ptr %p, align 256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: store <64 x i64> %v64i64, ptr %p, align 512
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-UF2-LABEL: 'vst4'
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i8> %v8i8, ptr %p, align 8
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> %v16i8, ptr %p, align 8
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i8> %v32i8, ptr %p, align 32
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i8> %v64i8, ptr %p, align 64
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> %v8i16, ptr %p, align 8
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i16> %v16i16, ptr %p, align 32
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i16> %v32i16, ptr %p, align 64
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i16> %v64i16, ptr %p, align 128
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i32> %v8i32, ptr %p, align 32
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i32> %v16i32, ptr %p, align 64
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i32> %v32i32, ptr %p, align 128
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i32> %v64i32, ptr %p, align 256
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i64> %v8i64, ptr %p, align 64
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i64> %v16i64, ptr %p, align 128
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i64> %v32i64, ptr %p, align 256
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i64> %v64i64, ptr %p, align 512
+; CHECK-UF2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-UF4-LABEL: 'vst4'
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i8> %v8i8, ptr %p, align 8
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> %v16i8, ptr %p, align 8
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i8> %v32i8, ptr %p, align 32
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i8> %v64i8, ptr %p, align 64
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> %v8i16, ptr %p, align 8
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i16> %v16i16, ptr %p, align 32
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i16> %v32i16, ptr %p, align 64
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i16> %v64i16, ptr %p, align 128
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i32> %v8i32, ptr %p, align 32
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i32> %v16i32, ptr %p, align 64
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i32> %v32i32, ptr %p, align 128
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i32> %v64i32, ptr %p, align 256
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i64> %v8i64, ptr %p, align 64
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i64> %v16i64, ptr %p, align 128
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i64> %v32i64, ptr %p, align 256
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i64> %v64i64, ptr %p, align 512
+; CHECK-UF4-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
   store <8 x i8> %v8i8, ptr %p
@@ -648,6 +766,3 @@ define void @vst4(ptr %p) {
 
   ret void
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-UF2: {{.*}}
-; CHECK-UF4: {{.*}}
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll
index 8edc2e6..5f1bce9 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll
@@ -1,16 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @add_i8() {
 ; CHECK-LABEL: 'add_i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
 
@@ -27,32 +27,32 @@ define void @add_i8() {
 
 define void @add_i16() {
 ; CHECK-LABEL: 'add_i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1za = zext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sa = sext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2za = zext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2sa = sext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3za = zext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3sa = sext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a0za = zext <1 x i8> undef to <1 x i16>
   %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
@@ -99,52 +99,52 @@ define void @add_i16() {
 
 define void @add_i32() {
 ; CHECK-LABEL: 'add_i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1za = zext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sa = sext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2za = zext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2sa = sext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3za = zext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3sa = sext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5za = zext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5sa = sext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a6za = zext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a6sa = sext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7za = zext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7sa = sext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8za = zext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8sa = sext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9za = zext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9sa = sext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa)
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a0za = zext <1 x i8> undef to <1 x i32>
   %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
@@ -221,72 +221,72 @@ define void @add_i32() {
 
 define void @add_i64() {
 ; CHECK-LABEL: 'add_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a1za = zext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a1sa = sext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a2za = zext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a2sa = sext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a3za = zext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a3sa = sext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:298 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1322 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a5za = zext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %a5sa = sext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a6za = zext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a6sa = sext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a7za = zext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a7sa = sext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a8za = zext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a8sa = sext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:296 CodeSize:1 Lat:1 SizeLat:1 for: %a9za = zext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1320 CodeSize:1 Lat:1 SizeLat:1 for: %a9sa = sext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10za = zext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10sa = sext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a11za = zext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %a11sa = sext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a12za = zext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:1 Lat:1 SizeLat:1 for: %a12sa = sext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %a13za = zext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:264 CodeSize:1 Lat:1 SizeLat:1 for: %a13sa = sext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:288 CodeSize:1 Lat:1 SizeLat:1 for: %a14za = zext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1056 CodeSize:1 Lat:1 SizeLat:1 for: %a14sa = sext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa)
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a0za = zext <1 x i8> undef to <1 x i64>
   %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
@@ -393,17 +393,17 @@ define void @add_i64() {
 
 define void @mla_i8() {
 ; CHECK-LABEL: 'mla_i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0m = mul <1 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a1m = mul <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2m = mul <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3m = mul <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a4m = mul <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0m = mul <1 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %a1m = mul <2 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2m = mul <4 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3m = mul <8 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a4m = mul <16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a0m = mul <1 x i8> undef, undef
   %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
@@ -425,57 +425,57 @@ define void @mla_i8() {
 
 define void @mla_i16() {
 ; CHECK-LABEL: 'mla_i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i16> %a0za, %a0zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i16> %a0sa, %a0sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i16> %a1za, %a1zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i16> %a1sa, %a1sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2zb = zext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i16> %a2za, %a2zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sb = sext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i16> %a2sa, %a2sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3zb = zext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3zm = mul <8 x i16> %a3za, %a3zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3sb = sext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3sm = mul <8 x i16> %a3sa, %a3sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4zb = zext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4zm = mul <16 x i16> %a4za, %a4zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a4sb = sext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4sm = mul <16 x i16> %a4sa, %a4sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5m = mul <1 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a6m = mul <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7m = mul <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a8m = mul <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a9m = mul <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0zb = zext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0zm = mul <1 x i16> %a0za, %a0zb
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sb = sext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0sm = mul <1 x i16> %a0sa, %a0sb
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1za = zext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1zb = zext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 26 for: %a1zm = mul <2 x i16> %a1za, %a1zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sa = sext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sb = sext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 26 for: %a1sm = mul <2 x i16> %a1sa, %a1sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2za = zext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2zb = zext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2zm = mul <4 x i16> %a2za, %a2zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2sa = sext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2sb = sext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2sm = mul <4 x i16> %a2sa, %a2sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3za = zext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3zb = zext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3zm = mul <8 x i16> %a3za, %a3zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3sa = sext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3sb = sext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3sm = mul <8 x i16> %a3sa, %a3sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4zb = zext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4zm = mul <16 x i16> %a4za, %a4zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4sb = sext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4sm = mul <16 x i16> %a4sa, %a4sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a5m = mul <1 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %a6m = mul <2 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7m = mul <4 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a8m = mul <8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a9m = mul <16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a0za = zext <1 x i8> undef to <1 x i16>
   %a0zb = zext <1 x i8> undef to <1 x i16>
@@ -547,97 +547,97 @@ define void @mla_i16() {
 
 define void @mla_i32() {
 ; CHECK-LABEL: 'mla_i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i32> %a0za, %a0zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i32> %a0sa, %a0sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i32> %a1za, %a1zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i32> %a1sa, %a1sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2zb = zext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i32> %a2za, %a2zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a2sb = sext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i32> %a2sa, %a2sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3zb = zext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3zm = mul <8 x i32> %a3za, %a3zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a3sb = sext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3sm = mul <8 x i32> %a3sa, %a3sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4zb = zext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4zm = mul <16 x i32> %a4za, %a4zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a4sb = sext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4sm = mul <16 x i32> %a4sa, %a4sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5zb = zext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5zm = mul <1 x i32> %a5za, %a5zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5sb = sext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a5sm = mul <1 x i32> %a5sa, %a5sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a6zb = zext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %a6zm = mul <2 x i32> %a6za, %a6zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6sb = sext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %a6sm = mul <2 x i32> %a6sa, %a6sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7zb = zext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7zm = mul <4 x i32> %a7za, %a7zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7sb = sext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7sm = mul <4 x i32> %a7sa, %a7sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8zb = zext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8zm = mul <8 x i32> %a8za, %a8zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a8sb = sext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8sm = mul <8 x i32> %a8sa, %a8sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9zb = zext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9zm = mul <16 x i32> %a9za, %a9zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a9sb = sext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9sm = mul <16 x i32> %a9sa, %a9sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a10m = mul <1 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %a11m = mul <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a12m = mul <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a13m = mul <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a14m = mul <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0zb = zext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0zm = mul <1 x i32> %a0za, %a0zb
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sb = sext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0sm = mul <1 x i32> %a0sa, %a0sb
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1za = zext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1zb = zext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 26 for: %a1zm = mul <2 x i32> %a1za, %a1zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sa = sext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sb = sext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 26 for: %a1sm = mul <2 x i32> %a1sa, %a1sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2za = zext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2zb = zext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2zm = mul <4 x i32> %a2za, %a2zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2sa = sext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2sb = sext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2sm = mul <4 x i32> %a2sa, %a2sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3za = zext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3zb = zext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3zm = mul <8 x i32> %a3za, %a3zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3sa = sext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3sb = sext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3sm = mul <8 x i32> %a3sa, %a3sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4zb = zext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4zm = mul <16 x i32> %a4za, %a4zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4sb = sext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4sm = mul <16 x i32> %a4sa, %a4sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5za = zext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5zb = zext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a5zm = mul <1 x i32> %a5za, %a5zb
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5sa = sext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5sb = sext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a5sm = mul <1 x i32> %a5sa, %a5sb
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a6za = zext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a6zb = zext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 26 for: %a6zm = mul <2 x i32> %a6za, %a6zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a6sa = sext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a6sb = sext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 26 for: %a6sm = mul <2 x i32> %a6sa, %a6sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7za = zext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7zb = zext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7zm = mul <4 x i32> %a7za, %a7zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7sa = sext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7sb = sext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7sm = mul <4 x i32> %a7sa, %a7sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8za = zext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8zb = zext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8zm = mul <8 x i32> %a8za, %a8zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8sa = sext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8sb = sext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8sm = mul <8 x i32> %a8sa, %a8sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9za = zext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9zb = zext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9zm = mul <16 x i32> %a9za, %a9zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9sa = sext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9sb = sext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9sm = mul <16 x i32> %a9sa, %a9sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a10m = mul <1 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m)
+; CHECK-NEXT:  Cost Model: Found costs of 10 for: %a11m = mul <2 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a12m = mul <4 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a13m = mul <8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a14m = mul <16 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a0za = zext <1 x i8> undef to <1 x i32>
   %a0zb = zext <1 x i8> undef to <1 x i32>
@@ -759,137 +759,137 @@ define void @mla_i32() {
 
 define void @mla_i64() {
 ; CHECK-LABEL: 'mla_i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a0zb = zext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0zm = mul <1 x i64> %a0za, %a0zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a0sb = sext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a0sm = mul <1 x i64> %a0sa, %a0sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a1zb = zext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %a1zm = mul <2 x i64> %a1za, %a1zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a1sb = sext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %a1sm = mul <2 x i64> %a1sa, %a1sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a2zb = zext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %a2zm = mul <4 x i64> %a2za, %a2zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a2sb = sext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %a2sm = mul <4 x i64> %a2sa, %a2sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a3zb = zext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a3zm = mul <8 x i64> %a3za, %a3zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a3sb = sext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a3sm = mul <8 x i64> %a3sa, %a3sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %a4zb = zext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %a4zm = mul <16 x i64> %a4za, %a4zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1322 for instruction: %a4sb = sext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %a4sm = mul <16 x i64> %a4sa, %a4sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a5zb = zext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5zm = mul <1 x i64> %a5za, %a5zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %a5sb = sext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a5sm = mul <1 x i64> %a5sa, %a5sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a6zb = zext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %a6zm = mul <2 x i64> %a6za, %a6zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a6sb = sext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %a6sm = mul <2 x i64> %a6sa, %a6sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a7zb = zext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %a7zm = mul <4 x i64> %a7za, %a7zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %a7sb = sext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %a7sm = mul <4 x i64> %a7sa, %a7sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %a8zb = zext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a8zm = mul <8 x i64> %a8za, %a8zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %a8sb = sext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a8sm = mul <8 x i64> %a8sa, %a8sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 296 for instruction: %a9zb = zext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %a9zm = mul <16 x i64> %a9za, %a9zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1320 for instruction: %a9sb = sext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %a9sm = mul <16 x i64> %a9sa, %a9sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a10zb = zext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a10zm = mul <1 x i64> %a10za, %a10zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %a10sb = sext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a10sm = mul <1 x i64> %a10sa, %a10sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a11zb = zext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %a11zm = mul <2 x i64> %a11za, %a11zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a11sb = sext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %a11sm = mul <2 x i64> %a11sa, %a11sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %a12zb = zext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %a12zm = mul <4 x i64> %a12za, %a12zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %a12sb = sext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %a12sm = mul <4 x i64> %a12sa, %a12sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %a13zb = zext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a13zm = mul <8 x i64> %a13za, %a13zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %a13sb = sext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a13sm = mul <8 x i64> %a13sa, %a13sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %a14zb = zext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %a14zm = mul <16 x i64> %a14za, %a14zb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1056 for instruction: %a14sb = sext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %a14sm = mul <16 x i64> %a14sa, %a14sb
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a15m = mul <1 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %a16m = mul <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %a17m = mul <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %a18m = mul <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %a19m = mul <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0zb = zext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a0zm = mul <1 x i64> %a0za, %a0zb
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0sb = sext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a0sm = mul <1 x i64> %a0sa, %a0sb
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a1za = zext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a1zb = zext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 52 for: %a1zm = mul <2 x i64> %a1za, %a1zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a1sa = sext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a1sb = sext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 52 for: %a1sm = mul <2 x i64> %a1sa, %a1sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a2za = zext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a2zb = zext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 104 for: %a2zm = mul <4 x i64> %a2za, %a2zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a2sa = sext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a2sb = sext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 104 for: %a2sm = mul <4 x i64> %a2sa, %a2sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a3za = zext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a3zb = zext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 208 for: %a3zm = mul <8 x i64> %a3za, %a3zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a3sa = sext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a3sb = sext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 208 for: %a3sm = mul <8 x i64> %a3sa, %a3sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:298 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:298 CodeSize:1 Lat:1 SizeLat:1 for: %a4zb = zext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 416 for: %a4zm = mul <16 x i64> %a4za, %a4zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1322 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1322 CodeSize:1 Lat:1 SizeLat:1 for: %a4sb = sext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 416 for: %a4sm = mul <16 x i64> %a4sa, %a4sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a5za = zext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a5zb = zext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a5zm = mul <1 x i64> %a5za, %a5zb
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %a5sa = sext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %a5sb = sext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a5sm = mul <1 x i64> %a5sa, %a5sb
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a6za = zext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a6zb = zext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 52 for: %a6zm = mul <2 x i64> %a6za, %a6zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a6sa = sext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a6sb = sext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 52 for: %a6sm = mul <2 x i64> %a6sa, %a6sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a7za = zext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a7zb = zext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 104 for: %a7zm = mul <4 x i64> %a7za, %a7zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a7sa = sext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a7sb = sext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 104 for: %a7sm = mul <4 x i64> %a7sa, %a7sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a8za = zext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a8zb = zext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 208 for: %a8zm = mul <8 x i64> %a8za, %a8zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a8sa = sext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a8sb = sext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 208 for: %a8sm = mul <8 x i64> %a8sa, %a8sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:296 CodeSize:1 Lat:1 SizeLat:1 for: %a9za = zext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:296 CodeSize:1 Lat:1 SizeLat:1 for: %a9zb = zext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 416 for: %a9zm = mul <16 x i64> %a9za, %a9zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1320 CodeSize:1 Lat:1 SizeLat:1 for: %a9sa = sext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1320 CodeSize:1 Lat:1 SizeLat:1 for: %a9sb = sext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 416 for: %a9sm = mul <16 x i64> %a9sa, %a9sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10za = zext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10zb = zext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a10zm = mul <1 x i64> %a10za, %a10zb
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10sa = sext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10sb = sext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a10sm = mul <1 x i64> %a10sa, %a10sb
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a11za = zext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a11zb = zext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 52 for: %a11zm = mul <2 x i64> %a11za, %a11zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %a11sa = sext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %a11sb = sext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 52 for: %a11sm = mul <2 x i64> %a11sa, %a11sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a12za = zext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a12zb = zext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 104 for: %a12zm = mul <4 x i64> %a12za, %a12zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:1 Lat:1 SizeLat:1 for: %a12sa = sext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:1 Lat:1 SizeLat:1 for: %a12sb = sext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 104 for: %a12sm = mul <4 x i64> %a12sa, %a12sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %a13za = zext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %a13zb = zext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 208 for: %a13zm = mul <8 x i64> %a13za, %a13zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:264 CodeSize:1 Lat:1 SizeLat:1 for: %a13sa = sext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:264 CodeSize:1 Lat:1 SizeLat:1 for: %a13sb = sext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 208 for: %a13sm = mul <8 x i64> %a13sa, %a13sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:288 CodeSize:1 Lat:1 SizeLat:1 for: %a14za = zext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:288 CodeSize:1 Lat:1 SizeLat:1 for: %a14zb = zext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 416 for: %a14zm = mul <16 x i64> %a14za, %a14zb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1056 CodeSize:1 Lat:1 SizeLat:1 for: %a14sa = sext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:1056 CodeSize:1 Lat:1 SizeLat:1 for: %a14sb = sext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 416 for: %a14sm = mul <16 x i64> %a14sa, %a14sb
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a15m = mul <1 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 8 for: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m)
+; CHECK-NEXT:  Cost Model: Found costs of 20 for: %a16m = mul <2 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m)
+; CHECK-NEXT:  Cost Model: Found costs of 40 for: %a17m = mul <4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m)
+; CHECK-NEXT:  Cost Model: Found costs of 80 for: %a18m = mul <8 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m)
+; CHECK-NEXT:  Cost Model: Found costs of 160 for: %a19m = mul <16 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %a0za = zext <1 x i8> undef to <1 x i64>
   %a0zb = zext <1 x i8> undef to <1 x i64>
diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll
index bdd8540..e64bce2 100644
--- a/llvm/test/Analysis/CostModel/RISCV/cast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput -intrinsic-cost-strategy=type-based-intrinsic-cost 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput -intrinsic-cost-strategy=type-based-intrinsic-cost 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput -intrinsic-cost-strategy=type-based-intrinsic-cost 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,ZVE64D
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput -intrinsic-cost-strategy=type-based-intrinsic-cost 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,ZVE64D
+; RUN: opt < %s -mtriple=riscv64 -mattr=+zvl128b,+zve64x -passes="print<cost-model>" -cost-kind=throughput -intrinsic-cost-strategy=type-based-intrinsic-cost 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,ZVE64X
 
 define void @sext() {
 ; CHECK-LABEL: 'sext'
@@ -2021,92 +2022,179 @@ define void @trunc() {
 }
 
 define void @fpext() {
-; CHECK-LABEL: 'fpext'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2f32 = fpext <2 x half> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2f64 = fpext <2 x half> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2f64 = fpext <2 x float> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2f32 = call <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4f32 = fpext <4 x half> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4f32 = call <4 x float> @llvm.vp.fpext.v4f32.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f16_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f16_v8f32 = call <8 x float> @llvm.vp.fpext.v8f32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8f16_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f32_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f16_v16f32 = call <16 x float> @llvm.vp.fpext.v16f32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16f16_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f32_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32f16_v32f32 = call <32 x float> @llvm.vp.fpext.v32f32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32f16_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32f32_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64f16_v64f32 = call <64 x float> @llvm.vp.fpext.v64f32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64f16_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64f32_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128f16_v128f32 = call <128 x float> @llvm.vp.fpext.v128f32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_v128f16_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128f32_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1f32 = fpext <vscale x 1 x half> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1f64 = fpext <vscale x 1 x half> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f64 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4f16_nxv4f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4f16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f16_nxv8f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8f16_nxv8f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8f16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16f16_nxv16f32 = fpext <vscale x 16 x half> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16f16_nxv16f64 = fpext <vscale x 16 x half> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16f64 = fpext <vscale x 16 x float> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16f16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16f16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16f32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32f16_nxv32f32 = fpext <vscale x 32 x half> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32f16_nxv32f64 = fpext <vscale x 32 x half> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32f64 = fpext <vscale x 32 x float> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32f16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fpext.nxv32f32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32f16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32f32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv64f16_nxv64f32 = fpext <vscale x 64 x half> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %nxv64f16_nxv64f64 = fpext <vscale x 64 x half> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64f64 = fpext <vscale x 64 x float> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64f16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fpext.nxv64f32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_nxv64f16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64f32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVE64D-LABEL: 'fpext'
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2f32 = fpext <2 x half> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2f64 = fpext <2 x half> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2f64 = fpext <2 x float> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2f32 = call <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4f32 = fpext <4 x half> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4f32 = call <4 x float> @llvm.vp.fpext.v4f32.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f16_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f16_v8f32 = call <8 x float> @llvm.vp.fpext.v8f32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8f16_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f32_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f16_v16f32 = call <16 x float> @llvm.vp.fpext.v16f32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16f16_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f32_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32f16_v32f32 = call <32 x float> @llvm.vp.fpext.v32f32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32f16_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32f32_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64f16_v64f32 = call <64 x float> @llvm.vp.fpext.v64f32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64f16_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64f32_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128f16_v128f32 = call <128 x float> @llvm.vp.fpext.v128f32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_v128f16_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128f32_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1f32 = fpext <vscale x 1 x half> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1f64 = fpext <vscale x 1 x half> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f64 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4f16_nxv4f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4f16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f16_nxv8f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8f16_nxv8f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8f16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16f16_nxv16f32 = fpext <vscale x 16 x half> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16f16_nxv16f64 = fpext <vscale x 16 x half> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16f64 = fpext <vscale x 16 x float> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16f16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16f16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16f32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32f16_nxv32f32 = fpext <vscale x 32 x half> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32f16_nxv32f64 = fpext <vscale x 32 x half> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32f64 = fpext <vscale x 32 x float> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32f16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fpext.nxv32f32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32f16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32f32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv64f16_nxv64f32 = fpext <vscale x 64 x half> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %nxv64f16_nxv64f64 = fpext <vscale x 64 x half> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64f64 = fpext <vscale x 64 x float> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64f16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fpext.nxv64f32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_nxv64f16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64f32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVE64X-LABEL: 'fpext'
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2f32 = fpext <2 x half> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2f64 = fpext <2 x half> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2f64 = fpext <2 x float> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2f32 = call <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16_v4f32 = fpext <4 x half> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4f16_v4f32 = call <4 x float> @llvm.vp.fpext.v4f32.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4f16_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4f32_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8f16_v8f32 = call <8 x float> @llvm.vp.fpext.v8f32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8f16_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8f32_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16f16_v16f32 = call <16 x float> @llvm.vp.fpext.v16f32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16f16_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16f32_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v32f16_v32f32 = call <32 x float> @llvm.vp.fpext.v32f32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v32f16_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v32f32_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v64f16_v64f32 = call <64 x float> @llvm.vp.fpext.v64f32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v64f16_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v64f32_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vp_v128f16_v128f32 = call <128 x float> @llvm.vp.fpext.v128f32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vp_v128f16_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vp_v128f32_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f16_nxv1f32 = fpext <vscale x 1 x half> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f16_nxv1f64 = fpext <vscale x 1 x half> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1f64 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f16_nxv2f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f16_nxv2f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f16_nxv4f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f16_nxv4f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f16_nxv8f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f16_nxv8f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f16_nxv16f32 = fpext <vscale x 16 x half> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f16_nxv16f64 = fpext <vscale x 16 x half> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16f64 = fpext <vscale x 16 x float> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f16_nxv32f32 = fpext <vscale x 32 x half> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f16_nxv32f64 = fpext <vscale x 32 x half> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32f64 = fpext <vscale x 32 x float> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fpext.nxv32f32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f16_nxv64f32 = fpext <vscale x 64 x half> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f16_nxv64f64 = fpext <vscale x 64 x half> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64f64 = fpext <vscale x 64 x float> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fpext.nxv64f32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f16_v2f32 = fpext <2 x half> undef to <2 x float>
   %v2f16_v2f64 = fpext <2 x half> undef to <2 x double>
@@ -2224,92 +2312,179 @@ define void @fpext() {
 }
 
 define void @fptrunc() {
-; CHECK-LABEL: 'fptrunc'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2f16 = fptrunc <2 x float> undef to <2 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2f16 = fptrunc <2 x double> undef to <2 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2f32 = fptrunc <2 x double> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2f32 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4f16 = fptrunc <4 x float> undef to <4 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4f16 = fptrunc <4 x double> undef to <4 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4f32 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4f16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4f16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4f32 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8f16 = fptrunc <8 x float> undef to <8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8f16 = fptrunc <8 x double> undef to <8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8f32 = fptrunc <8 x double> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8f16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8f16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8f32 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16f16 = fptrunc <16 x float> undef to <16 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16f16 = fptrunc <16 x double> undef to <16 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16f32 = fptrunc <16 x double> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16f16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f64_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16f16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f64_v16f32 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32f16 = fptrunc <32 x float> undef to <32 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32f16 = fptrunc <32 x double> undef to <32 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32f32 = fptrunc <32 x double> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32f32_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32f16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32f64_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32f16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32f64_v32f32 = call <32 x float> @llvm.vp.fptrunc.v32f32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64f16 = fptrunc <64 x float> undef to <64 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64f16 = fptrunc <64 x double> undef to <64 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64f64_v64f32 = fptrunc <64 x double> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64f32_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64f16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64f64_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64f16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64f64_v64f32 = call <64 x float> @llvm.vp.fptrunc.v64f32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128f32_v128f16 = fptrunc <128 x float> undef to <128 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128f64_v128f16 = fptrunc <128 x double> undef to <128 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128f64_v128f32 = fptrunc <128 x double> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128f32_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128f16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128f64_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128f16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128f64_v128f32 = call <128 x float> @llvm.vp.fptrunc.v128f32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f16 = fptrunc <vscale x 1 x float> undef to <vscale x 1 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1f16 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1f32 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1f16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1f16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv1f16 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv1f16 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv1f32 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4f16 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4f16 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4f32 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4f16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4f16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8f16 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f64_nxv8f16 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f64_nxv8f32 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8f16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f64_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8f16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16f32_nxv16f16 = fptrunc <vscale x 16 x float> undef to <vscale x 16 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16f64_nxv16f16 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16f64_nxv16f32 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16f32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16f16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16f64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16f16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16f64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32f32_nxv32f16 = fptrunc <vscale x 32 x float> undef to <vscale x 32 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32f64_nxv32f16 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32f64_nxv32f32 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32f32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32f16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32f64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32f16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32f64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fptrunc.nxv32f32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64f32_nxv64f16 = fptrunc <vscale x 64 x float> undef to <vscale x 64 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64f64_nxv64f16 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64f64_nxv64f32 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64f32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64f16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_nxv64f64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64f16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64f64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fptrunc.nxv64f32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVE64D-LABEL: 'fptrunc'
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2f16 = fptrunc <2 x float> undef to <2 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2f16 = fptrunc <2 x double> undef to <2 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2f32 = fptrunc <2 x double> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2f32 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4f16 = fptrunc <4 x float> undef to <4 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4f16 = fptrunc <4 x double> undef to <4 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4f32 = fptrunc <4 x double> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4f16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4f16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4f32 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8f16 = fptrunc <8 x float> undef to <8 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8f16 = fptrunc <8 x double> undef to <8 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8f32 = fptrunc <8 x double> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8f16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8f16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8f32 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16f16 = fptrunc <16 x float> undef to <16 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16f16 = fptrunc <16 x double> undef to <16 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16f32 = fptrunc <16 x double> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16f16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f64_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16f16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f64_v16f32 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32f16 = fptrunc <32 x float> undef to <32 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32f16 = fptrunc <32 x double> undef to <32 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32f32 = fptrunc <32 x double> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32f32_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32f16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32f64_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32f16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32f64_v32f32 = call <32 x float> @llvm.vp.fptrunc.v32f32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64f16 = fptrunc <64 x float> undef to <64 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64f16 = fptrunc <64 x double> undef to <64 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64f64_v64f32 = fptrunc <64 x double> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64f32_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64f16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64f64_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64f16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64f64_v64f32 = call <64 x float> @llvm.vp.fptrunc.v64f32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128f32_v128f16 = fptrunc <128 x float> undef to <128 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128f64_v128f16 = fptrunc <128 x double> undef to <128 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128f64_v128f32 = fptrunc <128 x double> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128f32_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128f16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128f64_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128f16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128f64_v128f32 = call <128 x float> @llvm.vp.fptrunc.v128f32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f16 = fptrunc <vscale x 1 x float> undef to <vscale x 1 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1f16 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1f32 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1f16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1f16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv1f16 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv1f16 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv1f32 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4f16 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4f16 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4f32 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4f16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4f16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8f16 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f64_nxv8f16 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f64_nxv8f32 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8f16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f64_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8f16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16f32_nxv16f16 = fptrunc <vscale x 16 x float> undef to <vscale x 16 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16f64_nxv16f16 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16f64_nxv16f32 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16f32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16f16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16f64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16f16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16f64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32f32_nxv32f16 = fptrunc <vscale x 32 x float> undef to <vscale x 32 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32f64_nxv32f16 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32f64_nxv32f32 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32f32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32f16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32f64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32f16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32f64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fptrunc.nxv32f32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64f32_nxv64f16 = fptrunc <vscale x 64 x float> undef to <vscale x 64 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64f64_nxv64f16 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x half>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64f64_nxv64f32 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64f32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64f16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_nxv64f64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64f16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64f64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fptrunc.nxv64f32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVE64X-LABEL: 'fptrunc'
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2f16 = fptrunc <2 x float> undef to <2 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2f16 = fptrunc <2 x double> undef to <2 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2f32 = fptrunc <2 x double> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2f32 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32_v4f16 = fptrunc <4 x float> undef to <4 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64_v4f16 = fptrunc <4 x double> undef to <4 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64_v4f32 = fptrunc <4 x double> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4f32_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4f16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4f64_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4f16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4f64_v4f32 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32_v8f16 = fptrunc <8 x float> undef to <8 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f64_v8f16 = fptrunc <8 x double> undef to <8 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f64_v8f32 = fptrunc <8 x double> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8f32_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8f16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8f64_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8f16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8f64_v8f32 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f32_v16f16 = fptrunc <16 x float> undef to <16 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f64_v16f16 = fptrunc <16 x double> undef to <16 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f64_v16f32 = fptrunc <16 x double> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16f32_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16f16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16f64_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16f16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16f64_v16f32 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32f32_v32f16 = fptrunc <32 x float> undef to <32 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32f64_v32f16 = fptrunc <32 x double> undef to <32 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32f64_v32f32 = fptrunc <32 x double> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v32f32_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32f16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v32f64_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32f16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v32f64_v32f32 = call <32 x float> @llvm.vp.fptrunc.v32f32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64f32_v64f16 = fptrunc <64 x float> undef to <64 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64f64_v64f16 = fptrunc <64 x double> undef to <64 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64f64_v64f32 = fptrunc <64 x double> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v64f32_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64f16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v64f64_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64f16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v64f64_v64f32 = call <64 x float> @llvm.vp.fptrunc.v64f32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v128f32_v128f16 = fptrunc <128 x float> undef to <128 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v128f64_v128f16 = fptrunc <128 x double> undef to <128 x half>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v128f64_v128f32 = fptrunc <128 x double> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vp_v128f32_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128f16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vp_v128f64_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128f16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %vp_v128f64_v128f32 = call <128 x float> @llvm.vp.fptrunc.v128f32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1f16 = fptrunc <vscale x 1 x float> undef to <vscale x 1 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1f16 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1f32 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1f16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1f16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv1f16 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv1f16 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv1f32 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4f16 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4f16 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4f32 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4f16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4f16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8f16 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8f16 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8f32 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8f16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8f16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16f16 = fptrunc <vscale x 16 x float> undef to <vscale x 16 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16f16 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16f32 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16f16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16f16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32f16 = fptrunc <vscale x 32 x float> undef to <vscale x 32 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32f16 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32f32 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32f16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32f16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fptrunc.nxv32f32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64f16 = fptrunc <vscale x 64 x float> undef to <vscale x 64 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64f16 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x half>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64f32 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64f16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64f16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fptrunc.nxv64f32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f32_v2f16 = fptrunc <2 x float> undef to <2 x half>
   %v2f64_v2f16 = fptrunc <2 x double> undef to <2 x half>
@@ -2427,288 +2602,571 @@ define void @fptrunc() {
 }
 
 define void @fptosi() {
-; CHECK-LABEL: 'fptosi'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2i8 = fptosi <2 x float> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i8 = fptosi <2 x double> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i16 = fptosi <2 x float> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2i16 = fptosi <2 x double> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i32 = fptosi <2 x float> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i32 = fptosi <2 x double> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i64 = fptosi <2 x float> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i64 = fptosi <2 x double> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptosi <2 x float> undef to <2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptosi <2 x double> undef to <2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptosi <4 x float> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptosi <4 x double> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i16 = fptosi <4 x float> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i16 = fptosi <4 x double> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i32 = fptosi <4 x float> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i32 = fptosi <4 x double> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i64 = fptosi <4 x float> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i64 = fptosi <4 x double> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i16 = fptosi <8 x float> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i16 = fptosi <8 x double> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i32 = fptosi <8 x float> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i32 = fptosi <8 x double> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8i64 = fptosi <8 x float> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i64 = fptosi <8 x double> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i16 = fptosi <16 x float> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16i16 = fptosi <16 x double> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32_v16i32 = fptosi <16 x float> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16i32 = fptosi <16 x double> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16i64 = fptosi <16 x float> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64_v16i64 = fptosi <16 x double> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32i16 = fptosi <32 x float> undef to <32 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32i16 = fptosi <32 x double> undef to <32 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32f32_v32i32 = fptosi <32 x float> undef to <32 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32i32 = fptosi <32 x double> undef to <32 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32i64 = fptosi <32 x float> undef to <32 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32f64_v32i64 = fptosi <32 x double> undef to <32 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64i16 = fptosi <64 x float> undef to <64 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64i16 = fptosi <64 x double> undef to <64 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64f32_v64i32 = fptosi <64 x float> undef to <64 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64f64_v64i32 = fptosi <64 x double> undef to <64 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64i64 = fptosi <64 x float> undef to <64 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64f64_v64i64 = fptosi <64 x double> undef to <64 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v64f32_v64i1 = fptosi <64 x float> undef to <64 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64f64_v64i1 = fptosi <64 x double> undef to <64 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v128f32_v128i8 = fptosi <128 x float> undef to <128 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v128f64_v128i8 = fptosi <128 x double> undef to <128 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128f32_v128i16 = fptosi <128 x float> undef to <128 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128f64_v128i16 = fptosi <128 x double> undef to <128 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v128f32_v128i32 = fptosi <128 x float> undef to <128 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128f64_v128i32 = fptosi <128 x double> undef to <128 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128i64 = fptosi <128 x float> undef to <128 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v128f64_v128i64 = fptosi <128 x double> undef to <128 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v128f32_v128i1 = fptosi <128 x float> undef to <128 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %v128f64_v128i1 = fptosi <128 x double> undef to <128 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i16 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1i16 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i32 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i32 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i64 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i64 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i16 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i16 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f64_nxv4i8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i16 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i16 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4i32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4i64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f64_nxv4i64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4f64_nxv4i1 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8f64_nxv8i8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i16 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f64_nxv8i16 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f32_nxv8i32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f64_nxv8i32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8i64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f64_nxv8i64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f32_nxv8i1 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8f64_nxv8i1 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv16f32_nxv16i8 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv16f64_nxv16i8 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16f32_nxv16i16 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16f64_nxv16i16 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16f32_nxv16i32 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16f64_nxv16i32 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16i64 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_nxv16i64 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv16f32_nxv16i1 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16f64_nxv16i1 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32f32_nxv32i8 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv32f64_nxv32i8 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32f32_nxv32i16 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32f64_nxv32i16 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_nxv32i32 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32f64_nxv32i32 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32i64 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32f64_nxv32i64 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv32f32_nxv32i1 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32f64_nxv32i1 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv64f32_nxv64i8 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %nxv64f64_nxv64i8 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64f32_nxv64i16 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64f64_nxv64i16 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv64f32_nxv64i32 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64f64_nxv64i32 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64i64 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv64f64_nxv64i64 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv64f32_nxv64i1 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %nxv64f64_nxv64i1 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVE64D-LABEL: 'fptosi'
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2i8 = fptosi <2 x float> undef to <2 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i8 = fptosi <2 x double> undef to <2 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i16 = fptosi <2 x float> undef to <2 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2i16 = fptosi <2 x double> undef to <2 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i32 = fptosi <2 x float> undef to <2 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i32 = fptosi <2 x double> undef to <2 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i64 = fptosi <2 x float> undef to <2 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i64 = fptosi <2 x double> undef to <2 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptosi <2 x float> undef to <2 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptosi <2 x double> undef to <2 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptosi <4 x float> undef to <4 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptosi <4 x double> undef to <4 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i16 = fptosi <4 x float> undef to <4 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i16 = fptosi <4 x double> undef to <4 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i32 = fptosi <4 x float> undef to <4 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i32 = fptosi <4 x double> undef to <4 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i64 = fptosi <4 x float> undef to <4 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i64 = fptosi <4 x double> undef to <4 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i16 = fptosi <8 x float> undef to <8 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i16 = fptosi <8 x double> undef to <8 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i32 = fptosi <8 x float> undef to <8 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i32 = fptosi <8 x double> undef to <8 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8i64 = fptosi <8 x float> undef to <8 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i64 = fptosi <8 x double> undef to <8 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i16 = fptosi <16 x float> undef to <16 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16i16 = fptosi <16 x double> undef to <16 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32_v16i32 = fptosi <16 x float> undef to <16 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16i32 = fptosi <16 x double> undef to <16 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16i64 = fptosi <16 x float> undef to <16 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64_v16i64 = fptosi <16 x double> undef to <16 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32i16 = fptosi <32 x float> undef to <32 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32i16 = fptosi <32 x double> undef to <32 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32f32_v32i32 = fptosi <32 x float> undef to <32 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32i32 = fptosi <32 x double> undef to <32 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32i64 = fptosi <32 x float> undef to <32 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32f64_v32i64 = fptosi <32 x double> undef to <32 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64i16 = fptosi <64 x float> undef to <64 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64i16 = fptosi <64 x double> undef to <64 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64f32_v64i32 = fptosi <64 x float> undef to <64 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64f64_v64i32 = fptosi <64 x double> undef to <64 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64i64 = fptosi <64 x float> undef to <64 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64f64_v64i64 = fptosi <64 x double> undef to <64 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v64f32_v64i1 = fptosi <64 x float> undef to <64 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64f64_v64i1 = fptosi <64 x double> undef to <64 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v128f32_v128i8 = fptosi <128 x float> undef to <128 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v128f64_v128i8 = fptosi <128 x double> undef to <128 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128f32_v128i16 = fptosi <128 x float> undef to <128 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128f64_v128i16 = fptosi <128 x double> undef to <128 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v128f32_v128i32 = fptosi <128 x float> undef to <128 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128f64_v128i32 = fptosi <128 x double> undef to <128 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128i64 = fptosi <128 x float> undef to <128 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v128f64_v128i64 = fptosi <128 x double> undef to <128 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v128f32_v128i1 = fptosi <128 x float> undef to <128 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %v128f64_v128i1 = fptosi <128 x double> undef to <128 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i16 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1i16 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i32 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i32 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i64 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i64 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i16 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i16 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f64_nxv4i8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i16 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i16 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4i32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4i64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f64_nxv4i64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4f64_nxv4i1 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8f64_nxv8i8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i16 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f64_nxv8i16 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f32_nxv8i32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f64_nxv8i32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8i64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f64_nxv8i64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f32_nxv8i1 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8f64_nxv8i1 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv16f32_nxv16i8 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv16f64_nxv16i8 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16f32_nxv16i16 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16f64_nxv16i16 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16f32_nxv16i32 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16f64_nxv16i32 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16i64 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_nxv16i64 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv16f32_nxv16i1 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16f64_nxv16i1 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32f32_nxv32i8 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv32f64_nxv32i8 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32f32_nxv32i16 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32f64_nxv32i16 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_nxv32i32 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32f64_nxv32i32 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32i64 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32f64_nxv32i64 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv32f32_nxv32i1 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32f64_nxv32i1 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv64f32_nxv64i8 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %nxv64f64_nxv64i8 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64f32_nxv64i16 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64f64_nxv64i16 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv64f32_nxv64i32 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64f64_nxv64i32 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64i64 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv64f64_nxv64i64 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv64f32_nxv64i1 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %nxv64f64_nxv64i1 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVE64X-LABEL: 'fptosi'
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f32_v2i8 = fptosi <2 x float> undef to <2 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f64_v2i8 = fptosi <2 x double> undef to <2 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f32_v2i16 = fptosi <2 x float> undef to <2 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f64_v2i16 = fptosi <2 x double> undef to <2 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f32_v2i32 = fptosi <2 x float> undef to <2 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f64_v2i32 = fptosi <2 x double> undef to <2 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i64 = fptosi <2 x float> undef to <2 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i64 = fptosi <2 x double> undef to <2 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v2f32_v2i1 = fptosi <2 x float> undef to <2 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v2f64_v2i1 = fptosi <2 x double> undef to <2 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f32_v4i8 = fptosi <4 x float> undef to <4 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f64_v4i8 = fptosi <4 x double> undef to <4 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f32_v4i16 = fptosi <4 x float> undef to <4 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f64_v4i16 = fptosi <4 x double> undef to <4 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f32_v4i32 = fptosi <4 x float> undef to <4 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f64_v4i32 = fptosi <4 x double> undef to <4 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4f32_v4i64 = fptosi <4 x float> undef to <4 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4f64_v4i64 = fptosi <4 x double> undef to <4 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f32_v8i16 = fptosi <8 x float> undef to <8 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f64_v8i16 = fptosi <8 x double> undef to <8 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f32_v8i32 = fptosi <8 x float> undef to <8 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f64_v8i32 = fptosi <8 x double> undef to <8 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8f32_v8i64 = fptosi <8 x float> undef to <8 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8f64_v8i64 = fptosi <8 x double> undef to <8 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f32_v16i16 = fptosi <16 x float> undef to <16 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f64_v16i16 = fptosi <16 x double> undef to <16 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f32_v16i32 = fptosi <16 x float> undef to <16 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f64_v16i32 = fptosi <16 x double> undef to <16 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16f32_v16i64 = fptosi <16 x float> undef to <16 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16f64_v16i64 = fptosi <16 x double> undef to <16 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f32_v32i16 = fptosi <32 x float> undef to <32 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f64_v32i16 = fptosi <32 x double> undef to <32 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f32_v32i32 = fptosi <32 x float> undef to <32 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f64_v32i32 = fptosi <32 x double> undef to <32 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v32f32_v32i64 = fptosi <32 x float> undef to <32 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v32f64_v32i64 = fptosi <32 x double> undef to <32 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 319 for instruction: %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 319 for instruction: %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 319 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 319 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v64f32_v64i16 = fptosi <64 x float> undef to <64 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v64f64_v64i16 = fptosi <64 x double> undef to <64 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %v64f32_v64i32 = fptosi <64 x float> undef to <64 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %v64f64_v64i32 = fptosi <64 x double> undef to <64 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v64f32_v64i64 = fptosi <64 x float> undef to <64 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v64f64_v64i64 = fptosi <64 x double> undef to <64 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 639 for instruction: %v64f32_v64i1 = fptosi <64 x float> undef to <64 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 639 for instruction: %v64f64_v64i1 = fptosi <64 x double> undef to <64 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 639 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 639 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 511 for instruction: %v128f32_v128i8 = fptosi <128 x float> undef to <128 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 511 for instruction: %v128f64_v128i8 = fptosi <128 x double> undef to <128 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %v128f32_v128i16 = fptosi <128 x float> undef to <128 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %v128f64_v128i16 = fptosi <128 x double> undef to <128 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %v128f32_v128i32 = fptosi <128 x float> undef to <128 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %v128f64_v128i32 = fptosi <128 x double> undef to <128 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %v128f32_v128i64 = fptosi <128 x float> undef to <128 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %v128f64_v128i64 = fptosi <128 x double> undef to <128 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1279 for instruction: %v128f32_v128i1 = fptosi <128 x float> undef to <128 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1279 for instruction: %v128f64_v128i1 = fptosi <128 x double> undef to <128 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 511 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 511 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1279 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1279 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i8 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i8 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i16 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i16 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i32 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i32 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i64 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i64 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i1 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i1 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i16 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i16 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i1 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i1 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i16 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i16 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i1 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i1 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i16 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i16 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i1 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i1 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i8 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i8 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i16 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i16 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i32 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i32 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i64 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i64 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i1 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i1 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i8 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i8 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i16 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i16 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i32 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i32 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i64 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i64 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i1 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i1 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i8 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i8 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i16 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i16 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i32 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i32 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i64 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i64 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i1 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i1 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f32_v2i8 = fptosi <2 x float> undef to <2 x i8>
   %v2f64_v2i8 = fptosi <2 x double> undef to <2 x i8>
@@ -3022,288 +3480,571 @@ define void @fptosi() {
 }
 
 define void @fptoui() {
-; CHECK-LABEL: 'fptoui'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2i8 = fptoui <2 x float> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i8 = fptoui <2 x double> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i16 = fptoui <2 x float> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2i16 = fptoui <2 x double> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i32 = fptoui <2 x float> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i32 = fptoui <2 x double> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i64 = fptoui <2 x float> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i64 = fptoui <2 x double> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptoui <2 x float> undef to <2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptoui <2 x double> undef to <2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptoui <4 x float> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptoui <4 x double> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i16 = fptoui <4 x float> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i16 = fptoui <4 x double> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i32 = fptoui <4 x float> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i32 = fptoui <4 x double> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i64 = fptoui <4 x float> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i64 = fptoui <4 x double> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptoui <4 x float> undef to <4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptoui <4 x double> undef to <4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptoui <8 x float> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i8 = fptoui <8 x double> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i16 = fptoui <8 x float> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i16 = fptoui <8 x double> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i32 = fptoui <8 x float> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i32 = fptoui <8 x double> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8i64 = fptoui <8 x float> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i64 = fptoui <8 x double> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptoui <8 x float> undef to <8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f64_v8i1 = fptoui <8 x double> undef to <8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i8 = fptoui <16 x float> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v16f64_v16i8 = fptoui <16 x double> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i16 = fptoui <16 x float> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16i16 = fptoui <16 x double> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32_v16i32 = fptoui <16 x float> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16i32 = fptoui <16 x double> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16i64 = fptoui <16 x float> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64_v16i64 = fptoui <16 x double> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f32_v16i1 = fptoui <16 x float> undef to <16 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f64_v16i1 = fptoui <16 x double> undef to <16 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v32f32_v32i8 = fptoui <32 x float> undef to <32 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v32f64_v32i8 = fptoui <32 x double> undef to <32 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32i16 = fptoui <32 x float> undef to <32 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32i16 = fptoui <32 x double> undef to <32 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32f32_v32i32 = fptoui <32 x float> undef to <32 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32i32 = fptoui <32 x double> undef to <32 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32i64 = fptoui <32 x float> undef to <32 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32f64_v32i64 = fptoui <32 x double> undef to <32 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32f32_v32i1 = fptoui <32 x float> undef to <32 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32f64_v32i1 = fptoui <32 x double> undef to <32 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v64f32_v64i8 = fptoui <64 x float> undef to <64 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v64f64_v64i8 = fptoui <64 x double> undef to <64 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64i16 = fptoui <64 x float> undef to <64 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64i16 = fptoui <64 x double> undef to <64 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64f32_v64i32 = fptoui <64 x float> undef to <64 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64f64_v64i32 = fptoui <64 x double> undef to <64 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64i64 = fptoui <64 x float> undef to <64 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64f64_v64i64 = fptoui <64 x double> undef to <64 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v64f32_v64i1 = fptoui <64 x float> undef to <64 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64f64_v64i1 = fptoui <64 x double> undef to <64 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v128f32_v128i8 = fptoui <128 x float> undef to <128 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v128f64_v128i8 = fptoui <128 x double> undef to <128 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128f32_v128i16 = fptoui <128 x float> undef to <128 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128f64_v128i16 = fptoui <128 x double> undef to <128 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v128f32_v128i32 = fptoui <128 x float> undef to <128 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128f64_v128i32 = fptoui <128 x double> undef to <128 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128i64 = fptoui <128 x float> undef to <128 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v128f64_v128i64 = fptoui <128 x double> undef to <128 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v128f32_v128i1 = fptoui <128 x float> undef to <128 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %v128f64_v128i1 = fptoui <128 x double> undef to <128 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i16 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1i16 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i32 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i32 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i64 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i64 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i16 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i16 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f64_nxv4i8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i16 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i16 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4i32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4i64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f64_nxv4i64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4f64_nxv4i1 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8f64_nxv8i8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i16 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f64_nxv8i16 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f32_nxv8i32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f64_nxv8i32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8i64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f64_nxv8i64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f32_nxv8i1 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8f64_nxv8i1 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv16f32_nxv16i8 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv16f64_nxv16i8 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16f32_nxv16i16 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16f64_nxv16i16 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16f32_nxv16i32 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16f64_nxv16i32 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16i64 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_nxv16i64 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv16f32_nxv16i1 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16f64_nxv16i1 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32f32_nxv32i8 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv32f64_nxv32i8 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32f32_nxv32i16 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32f64_nxv32i16 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_nxv32i32 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32f64_nxv32i32 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32i64 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32f64_nxv32i64 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv32f32_nxv32i1 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32f64_nxv32i1 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv64f32_nxv64i8 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %nxv64f64_nxv64i8 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64f32_nxv64i16 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64f64_nxv64i16 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv64f32_nxv64i32 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64f64_nxv64i32 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64i64 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv64f64_nxv64i64 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv64f32_nxv64i1 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %nxv64f64_nxv64i1 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVE64D-LABEL: 'fptoui'
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2i8 = fptoui <2 x float> undef to <2 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i8 = fptoui <2 x double> undef to <2 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i16 = fptoui <2 x float> undef to <2 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2i16 = fptoui <2 x double> undef to <2 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i32 = fptoui <2 x float> undef to <2 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i32 = fptoui <2 x double> undef to <2 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i64 = fptoui <2 x float> undef to <2 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i64 = fptoui <2 x double> undef to <2 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptoui <2 x float> undef to <2 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptoui <2 x double> undef to <2 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptoui <4 x float> undef to <4 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptoui <4 x double> undef to <4 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i16 = fptoui <4 x float> undef to <4 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i16 = fptoui <4 x double> undef to <4 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i32 = fptoui <4 x float> undef to <4 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i32 = fptoui <4 x double> undef to <4 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i64 = fptoui <4 x float> undef to <4 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i64 = fptoui <4 x double> undef to <4 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptoui <4 x float> undef to <4 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptoui <4 x double> undef to <4 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptoui <8 x float> undef to <8 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i8 = fptoui <8 x double> undef to <8 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i16 = fptoui <8 x float> undef to <8 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i16 = fptoui <8 x double> undef to <8 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i32 = fptoui <8 x float> undef to <8 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i32 = fptoui <8 x double> undef to <8 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8i64 = fptoui <8 x float> undef to <8 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64_v8i64 = fptoui <8 x double> undef to <8 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptoui <8 x float> undef to <8 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f64_v8i1 = fptoui <8 x double> undef to <8 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i8 = fptoui <16 x float> undef to <16 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v16f64_v16i8 = fptoui <16 x double> undef to <16 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i16 = fptoui <16 x float> undef to <16 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16i16 = fptoui <16 x double> undef to <16 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32_v16i32 = fptoui <16 x float> undef to <16 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16i32 = fptoui <16 x double> undef to <16 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16i64 = fptoui <16 x float> undef to <16 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64_v16i64 = fptoui <16 x double> undef to <16 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f32_v16i1 = fptoui <16 x float> undef to <16 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f64_v16i1 = fptoui <16 x double> undef to <16 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v32f32_v32i8 = fptoui <32 x float> undef to <32 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v32f64_v32i8 = fptoui <32 x double> undef to <32 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32i16 = fptoui <32 x float> undef to <32 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32i16 = fptoui <32 x double> undef to <32 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32f32_v32i32 = fptoui <32 x float> undef to <32 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32i32 = fptoui <32 x double> undef to <32 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32i64 = fptoui <32 x float> undef to <32 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32f64_v32i64 = fptoui <32 x double> undef to <32 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32f32_v32i1 = fptoui <32 x float> undef to <32 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32f64_v32i1 = fptoui <32 x double> undef to <32 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v64f32_v64i8 = fptoui <64 x float> undef to <64 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v64f64_v64i8 = fptoui <64 x double> undef to <64 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64i16 = fptoui <64 x float> undef to <64 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64i16 = fptoui <64 x double> undef to <64 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64f32_v64i32 = fptoui <64 x float> undef to <64 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64f64_v64i32 = fptoui <64 x double> undef to <64 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64i64 = fptoui <64 x float> undef to <64 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64f64_v64i64 = fptoui <64 x double> undef to <64 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v64f32_v64i1 = fptoui <64 x float> undef to <64 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64f64_v64i1 = fptoui <64 x double> undef to <64 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v128f32_v128i8 = fptoui <128 x float> undef to <128 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v128f64_v128i8 = fptoui <128 x double> undef to <128 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128f32_v128i16 = fptoui <128 x float> undef to <128 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128f64_v128i16 = fptoui <128 x double> undef to <128 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v128f32_v128i32 = fptoui <128 x float> undef to <128 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128f64_v128i32 = fptoui <128 x double> undef to <128 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128i64 = fptoui <128 x float> undef to <128 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v128f64_v128i64 = fptoui <128 x double> undef to <128 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v128f32_v128i1 = fptoui <128 x float> undef to <128 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %v128f64_v128i1 = fptoui <128 x double> undef to <128 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i16 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1i16 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i32 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i32 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i64 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i64 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i16 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i16 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f64_nxv4i8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i16 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i16 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4i32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4i64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f64_nxv4i64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4f64_nxv4i1 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8f64_nxv8i8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i16 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f64_nxv8i16 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f32_nxv8i32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f64_nxv8i32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8i64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f64_nxv8i64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f32_nxv8i1 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8f64_nxv8i1 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv16f32_nxv16i8 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv16f64_nxv16i8 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16f32_nxv16i16 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16f64_nxv16i16 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16f32_nxv16i32 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16f64_nxv16i32 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16i64 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_nxv16i64 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv16f32_nxv16i1 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16f64_nxv16i1 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32f32_nxv32i8 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv32f64_nxv32i8 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32f32_nxv32i16 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32f64_nxv32i16 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_nxv32i32 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32f64_nxv32i32 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32i64 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32f64_nxv32i64 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv32f32_nxv32i1 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32f64_nxv32i1 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv64f32_nxv64i8 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %nxv64f64_nxv64i8 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64f32_nxv64i16 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64f64_nxv64i16 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv64f32_nxv64i32 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64f64_nxv64i32 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64i64 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv64f64_nxv64i64 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i64>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv64f32_nxv64i1 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %nxv64f64_nxv64i1 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVE64X-LABEL: 'fptoui'
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f32_v2i8 = fptoui <2 x float> undef to <2 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f64_v2i8 = fptoui <2 x double> undef to <2 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f32_v2i16 = fptoui <2 x float> undef to <2 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f64_v2i16 = fptoui <2 x double> undef to <2 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f32_v2i32 = fptoui <2 x float> undef to <2 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2f64_v2i32 = fptoui <2 x double> undef to <2 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i64 = fptoui <2 x float> undef to <2 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i64 = fptoui <2 x double> undef to <2 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v2f32_v2i1 = fptoui <2 x float> undef to <2 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v2f64_v2i1 = fptoui <2 x double> undef to <2 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f32_v4i8 = fptoui <4 x float> undef to <4 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f64_v4i8 = fptoui <4 x double> undef to <4 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f32_v4i16 = fptoui <4 x float> undef to <4 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f64_v4i16 = fptoui <4 x double> undef to <4 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f32_v4i32 = fptoui <4 x float> undef to <4 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v4f64_v4i32 = fptoui <4 x double> undef to <4 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4f32_v4i64 = fptoui <4 x float> undef to <4 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4f64_v4i64 = fptoui <4 x double> undef to <4 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %v4f32_v4i1 = fptoui <4 x float> undef to <4 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %v4f64_v4i1 = fptoui <4 x double> undef to <4 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f32_v8i8 = fptoui <8 x float> undef to <8 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f64_v8i8 = fptoui <8 x double> undef to <8 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f32_v8i16 = fptoui <8 x float> undef to <8 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f64_v8i16 = fptoui <8 x double> undef to <8 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f32_v8i32 = fptoui <8 x float> undef to <8 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v8f64_v8i32 = fptoui <8 x double> undef to <8 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8f32_v8i64 = fptoui <8 x float> undef to <8 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8f64_v8i64 = fptoui <8 x double> undef to <8 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %v8f32_v8i1 = fptoui <8 x float> undef to <8 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %v8f64_v8i1 = fptoui <8 x double> undef to <8 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f32_v16i8 = fptoui <16 x float> undef to <16 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f64_v16i8 = fptoui <16 x double> undef to <16 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f32_v16i16 = fptoui <16 x float> undef to <16 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f64_v16i16 = fptoui <16 x double> undef to <16 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f32_v16i32 = fptoui <16 x float> undef to <16 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v16f64_v16i32 = fptoui <16 x double> undef to <16 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16f32_v16i64 = fptoui <16 x float> undef to <16 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16f64_v16i64 = fptoui <16 x double> undef to <16 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %v16f32_v16i1 = fptoui <16 x float> undef to <16 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %v16f64_v16i1 = fptoui <16 x double> undef to <16 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f32_v32i8 = fptoui <32 x float> undef to <32 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f64_v32i8 = fptoui <32 x double> undef to <32 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f32_v32i16 = fptoui <32 x float> undef to <32 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f64_v32i16 = fptoui <32 x double> undef to <32 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f32_v32i32 = fptoui <32 x float> undef to <32 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32f64_v32i32 = fptoui <32 x double> undef to <32 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v32f32_v32i64 = fptoui <32 x float> undef to <32 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v32f64_v32i64 = fptoui <32 x double> undef to <32 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 319 for instruction: %v32f32_v32i1 = fptoui <32 x float> undef to <32 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 319 for instruction: %v32f64_v32i1 = fptoui <32 x double> undef to <32 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 319 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 319 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v64f32_v64i8 = fptoui <64 x float> undef to <64 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v64f64_v64i8 = fptoui <64 x double> undef to <64 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v64f32_v64i16 = fptoui <64 x float> undef to <64 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v64f64_v64i16 = fptoui <64 x double> undef to <64 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %v64f32_v64i32 = fptoui <64 x float> undef to <64 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %v64f64_v64i32 = fptoui <64 x double> undef to <64 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v64f32_v64i64 = fptoui <64 x float> undef to <64 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v64f64_v64i64 = fptoui <64 x double> undef to <64 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 639 for instruction: %v64f32_v64i1 = fptoui <64 x float> undef to <64 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 639 for instruction: %v64f64_v64i1 = fptoui <64 x double> undef to <64 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 639 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 639 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 511 for instruction: %v128f32_v128i8 = fptoui <128 x float> undef to <128 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 511 for instruction: %v128f64_v128i8 = fptoui <128 x double> undef to <128 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %v128f32_v128i16 = fptoui <128 x float> undef to <128 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %v128f64_v128i16 = fptoui <128 x double> undef to <128 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %v128f32_v128i32 = fptoui <128 x float> undef to <128 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %v128f64_v128i32 = fptoui <128 x double> undef to <128 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %v128f32_v128i64 = fptoui <128 x float> undef to <128 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %v128f64_v128i64 = fptoui <128 x double> undef to <128 x i64>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1279 for instruction: %v128f32_v128i1 = fptoui <128 x float> undef to <128 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1279 for instruction: %v128f64_v128i1 = fptoui <128 x double> undef to <128 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 511 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 511 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1279 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1279 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i8 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i8 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i16 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i16 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i32 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i32 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i64 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i64 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f32_nxv1i1 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1f64_nxv1i1 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i16 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i16 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f32_nxv2i1 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2f64_nxv2i1 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i16 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i16 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f32_nxv4i1 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4f64_nxv4i1 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i16 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i16 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f32_nxv8i1 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8f64_nxv8i1 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i8 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i8 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i16 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i16 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i32 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i32 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i64 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i64 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f32_nxv16i1 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16f64_nxv16i1 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i8 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i8 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i16 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i16 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i32 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i32 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i64 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i64 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f32_nxv32i1 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32f64_nxv32i1 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i8 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i8 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i8>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i16 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i16 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i16>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i32 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i32 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i32>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i64 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i64 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i64>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f32_nxv64i1 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64f64_nxv64i1 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i1>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f32_v2i8 = fptoui <2 x float> undef to <2 x i8>
   %v2f64_v2i8 = fptoui <2 x double> undef to <2 x i8>
@@ -3617,288 +4358,571 @@ define void @fptoui() {
 }
 
 define void @sitofp() {
-; CHECK-LABEL: 'sitofp'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = sitofp <2 x i1> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = sitofp <2 x i1> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = sitofp <4 x i1> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4f64 = sitofp <4 x i1> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8f32 = sitofp <8 x i1> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8f64 = sitofp <8 x i1> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i8_v16f64 = sitofp <16 x i8> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i16_v16f64 = sitofp <16 x i16> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16f64 = sitofp <16 x i32> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64_v16f64 = sitofp <16 x i64> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16f32 = sitofp <16 x i1> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16f64 = sitofp <16 x i1> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32i8_v32f32 = sitofp <32 x i8> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32i8_v32f64 = sitofp <32 x i8> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32f32 = sitofp <32 x i16> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32i16_v32f64 = sitofp <32 x i16> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32f32 = sitofp <32 x i32> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32f64 = sitofp <32 x i32> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i64_v32f32 = sitofp <32 x i64> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32f64 = sitofp <32 x i64> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32f32 = sitofp <32 x i1> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32f64 = sitofp <32 x i1> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v64i8_v64f32 = sitofp <64 x i8> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64i8_v64f64 = sitofp <64 x i8> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64f32 = sitofp <64 x i16> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64i16_v64f64 = sitofp <64 x i16> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i32_v64f32 = sitofp <64 x i32> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64f64 = sitofp <64 x i32> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64i64_v64f32 = sitofp <64 x i64> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v64i64_v64f64 = sitofp <64 x i64> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64f32 = sitofp <64 x i1> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64f64 = sitofp <64 x i1> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v128i8_v128f32 = sitofp <128 x i8> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %v128i8_v128f64 = sitofp <128 x i8> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128f32 = sitofp <128 x i16> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %v128i16_v128f64 = sitofp <128 x i16> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v128i32_v128f32 = sitofp <128 x i32> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128f64 = sitofp <128 x i32> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128i64_v128f32 = sitofp <128 x i64> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v128i64_v128f64 = sitofp <128 x i64> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128f32 = sitofp <128 x i1> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128f64 = sitofp <128 x i1> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f32 = sitofp <vscale x 1 x i16> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1f64 = sitofp <vscale x 1 x i16> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f32 = sitofp <vscale x 1 x i32> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f64 = sitofp <vscale x 1 x i32> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f32 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f64 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i8_nxv2f64 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f32 = sitofp <vscale x 2 x i16> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i16_nxv2f64 = sitofp <vscale x 2 x i16> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f32 = sitofp <vscale x 2 x i32> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2f64 = sitofp <vscale x 2 x i32> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64_nxv2f32 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f64 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2f64 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i8_nxv4f32 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i8_nxv4f64 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4f32 = sitofp <vscale x 4 x i16> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i16_nxv4f64 = sitofp <vscale x 4 x i16> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4f32 = sitofp <vscale x 4 x i32> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4f64 = sitofp <vscale x 4 x i32> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f32 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4f64 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4f32 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4f64 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i8_nxv8f32 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8i8_nxv8f64 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8f32 = sitofp <vscale x 8 x i16> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8i16_nxv8f64 = sitofp <vscale x 8 x i16> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32_nxv8f32 = sitofp <vscale x 8 x i32> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8f64 = sitofp <vscale x 8 x i32> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i64_nxv8f32 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64_nxv8f64 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8f32 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8f64 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv16i8_nxv16f32 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16i8_nxv16f64 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16f32 = sitofp <vscale x 16 x i16> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16i16_nxv16f64 = sitofp <vscale x 16 x i16> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32_nxv16f32 = sitofp <vscale x 16 x i32> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16f64 = sitofp <vscale x 16 x i32> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i64_nxv16f32 = sitofp <vscale x 16 x i64> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i64_nxv16f64 = sitofp <vscale x 16 x i64> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16f32 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16f64 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv32i8_nxv32f32 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32i8_nxv32f64 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32f32 = sitofp <vscale x 32 x i16> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32i16_nxv32f64 = sitofp <vscale x 32 x i16> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_nxv32f32 = sitofp <vscale x 32 x i32> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32f64 = sitofp <vscale x 32 x i32> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i64_nxv32f32 = sitofp <vscale x 32 x i64> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32i64_nxv32f64 = sitofp <vscale x 32 x i64> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32f32 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32f64 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv64i8_nxv64f32 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %nxv64i8_nxv64f64 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64f32 = sitofp <vscale x 64 x i16> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %nxv64i16_nxv64f64 = sitofp <vscale x 64 x i16> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv64i32_nxv64f32 = sitofp <vscale x 64 x i32> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64i32_nxv64f64 = sitofp <vscale x 64 x i32> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64i64_nxv64f32 = sitofp <vscale x 64 x i64> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv64i64_nxv64f64 = sitofp <vscale x 64 x i64> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64f32 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64f64 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVE64D-LABEL: 'sitofp'
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = sitofp <2 x i1> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = sitofp <2 x i1> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = sitofp <4 x i1> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4f64 = sitofp <4 x i1> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8f32 = sitofp <8 x i1> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8f64 = sitofp <8 x i1> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i8_v16f64 = sitofp <16 x i8> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i16_v16f64 = sitofp <16 x i16> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16f64 = sitofp <16 x i32> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64_v16f64 = sitofp <16 x i64> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16f32 = sitofp <16 x i1> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16f64 = sitofp <16 x i1> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32i8_v32f32 = sitofp <32 x i8> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32i8_v32f64 = sitofp <32 x i8> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32f32 = sitofp <32 x i16> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32i16_v32f64 = sitofp <32 x i16> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32f32 = sitofp <32 x i32> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32f64 = sitofp <32 x i32> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i64_v32f32 = sitofp <32 x i64> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32f64 = sitofp <32 x i64> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32f32 = sitofp <32 x i1> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32f64 = sitofp <32 x i1> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v64i8_v64f32 = sitofp <64 x i8> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64i8_v64f64 = sitofp <64 x i8> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64f32 = sitofp <64 x i16> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64i16_v64f64 = sitofp <64 x i16> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i32_v64f32 = sitofp <64 x i32> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64f64 = sitofp <64 x i32> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64i64_v64f32 = sitofp <64 x i64> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v64i64_v64f64 = sitofp <64 x i64> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64f32 = sitofp <64 x i1> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64f64 = sitofp <64 x i1> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v128i8_v128f32 = sitofp <128 x i8> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %v128i8_v128f64 = sitofp <128 x i8> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128f32 = sitofp <128 x i16> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %v128i16_v128f64 = sitofp <128 x i16> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v128i32_v128f32 = sitofp <128 x i32> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128f64 = sitofp <128 x i32> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128i64_v128f32 = sitofp <128 x i64> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v128i64_v128f64 = sitofp <128 x i64> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128f32 = sitofp <128 x i1> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128f64 = sitofp <128 x i1> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f32 = sitofp <vscale x 1 x i16> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1f64 = sitofp <vscale x 1 x i16> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f32 = sitofp <vscale x 1 x i32> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f64 = sitofp <vscale x 1 x i32> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f32 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f64 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i8_nxv2f64 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f32 = sitofp <vscale x 2 x i16> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i16_nxv2f64 = sitofp <vscale x 2 x i16> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f32 = sitofp <vscale x 2 x i32> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2f64 = sitofp <vscale x 2 x i32> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64_nxv2f32 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f64 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2f64 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i8_nxv4f32 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i8_nxv4f64 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4f32 = sitofp <vscale x 4 x i16> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i16_nxv4f64 = sitofp <vscale x 4 x i16> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4f32 = sitofp <vscale x 4 x i32> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4f64 = sitofp <vscale x 4 x i32> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f32 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4f64 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4f32 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4f64 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i8_nxv8f32 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8i8_nxv8f64 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8f32 = sitofp <vscale x 8 x i16> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8i16_nxv8f64 = sitofp <vscale x 8 x i16> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32_nxv8f32 = sitofp <vscale x 8 x i32> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8f64 = sitofp <vscale x 8 x i32> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i64_nxv8f32 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64_nxv8f64 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8f32 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8f64 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv16i8_nxv16f32 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16i8_nxv16f64 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16f32 = sitofp <vscale x 16 x i16> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16i16_nxv16f64 = sitofp <vscale x 16 x i16> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32_nxv16f32 = sitofp <vscale x 16 x i32> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16f64 = sitofp <vscale x 16 x i32> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i64_nxv16f32 = sitofp <vscale x 16 x i64> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i64_nxv16f64 = sitofp <vscale x 16 x i64> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16f32 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16f64 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv32i8_nxv32f32 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32i8_nxv32f64 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32f32 = sitofp <vscale x 32 x i16> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32i16_nxv32f64 = sitofp <vscale x 32 x i16> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_nxv32f32 = sitofp <vscale x 32 x i32> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32f64 = sitofp <vscale x 32 x i32> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i64_nxv32f32 = sitofp <vscale x 32 x i64> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32i64_nxv32f64 = sitofp <vscale x 32 x i64> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32f32 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32f64 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv64i8_nxv64f32 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %nxv64i8_nxv64f64 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64f32 = sitofp <vscale x 64 x i16> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %nxv64i16_nxv64f64 = sitofp <vscale x 64 x i16> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv64i32_nxv64f32 = sitofp <vscale x 64 x i32> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64i32_nxv64f64 = sitofp <vscale x 64 x i32> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64i64_nxv64f32 = sitofp <vscale x 64 x i64> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv64i64_nxv64f64 = sitofp <vscale x 64 x i64> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64f32 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64f64 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVE64X-LABEL: 'sitofp'
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = sitofp <2 x i1> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = sitofp <2 x i1> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i1_v4f32 = sitofp <4 x i1> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i1_v4f64 = sitofp <4 x i1> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i1_v8f32 = sitofp <8 x i1> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i1_v8f64 = sitofp <8 x i1> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i8_v16f64 = sitofp <16 x i8> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i16_v16f64 = sitofp <16 x i16> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i32_v16f64 = sitofp <16 x i32> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i64_v16f64 = sitofp <16 x i64> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i1_v16f32 = sitofp <16 x i1> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i1_v16f64 = sitofp <16 x i1> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i8_v32f32 = sitofp <32 x i8> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i8_v32f64 = sitofp <32 x i8> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i16_v32f32 = sitofp <32 x i16> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i16_v32f64 = sitofp <32 x i16> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i32_v32f32 = sitofp <32 x i32> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i32_v32f64 = sitofp <32 x i32> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v32i64_v32f32 = sitofp <32 x i64> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v32i64_v32f64 = sitofp <32 x i64> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i1_v32f32 = sitofp <32 x i1> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i1_v32f64 = sitofp <32 x i1> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i8_v64f32 = sitofp <64 x i8> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i8_v64f64 = sitofp <64 x i8> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i16_v64f32 = sitofp <64 x i16> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i16_v64f64 = sitofp <64 x i16> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v64i32_v64f32 = sitofp <64 x i32> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v64i32_v64f64 = sitofp <64 x i32> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v64i64_v64f32 = sitofp <64 x i64> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v64i64_v64f64 = sitofp <64 x i64> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i1_v64f32 = sitofp <64 x i1> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i1_v64f64 = sitofp <64 x i1> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v128i8_v128f32 = sitofp <128 x i8> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v128i8_v128f64 = sitofp <128 x i8> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %v128i16_v128f32 = sitofp <128 x i16> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %v128i16_v128f64 = sitofp <128 x i16> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %v128i32_v128f32 = sitofp <128 x i32> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %v128i32_v128f64 = sitofp <128 x i32> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %v128i64_v128f32 = sitofp <128 x i64> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %v128i64_v128f64 = sitofp <128 x i64> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v128i1_v128f32 = sitofp <128 x i1> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v128i1_v128f64 = sitofp <128 x i1> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i8_nxv1f32 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i8_nxv1f64 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i16_nxv1f32 = sitofp <vscale x 1 x i16> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i16_nxv1f64 = sitofp <vscale x 1 x i16> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i32_nxv1f32 = sitofp <vscale x 1 x i32> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i32_nxv1f64 = sitofp <vscale x 1 x i32> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i64_nxv1f32 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i64_nxv1f64 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i1_nxv1f32 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i1_nxv1f64 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i8_nxv2f32 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i8_nxv2f64 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i16_nxv2f32 = sitofp <vscale x 2 x i16> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i16_nxv2f64 = sitofp <vscale x 2 x i16> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i32_nxv2f32 = sitofp <vscale x 2 x i32> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i32_nxv2f64 = sitofp <vscale x 2 x i32> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i64_nxv2f32 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i64_nxv2f64 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i1_nxv2f32 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i1_nxv2f64 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i8_nxv4f32 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i8_nxv4f64 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i16_nxv4f32 = sitofp <vscale x 4 x i16> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i16_nxv4f64 = sitofp <vscale x 4 x i16> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i32_nxv4f32 = sitofp <vscale x 4 x i32> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i32_nxv4f64 = sitofp <vscale x 4 x i32> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i64_nxv4f32 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i64_nxv4f64 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i1_nxv4f32 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i1_nxv4f64 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i8_nxv8f32 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i8_nxv8f64 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i16_nxv8f32 = sitofp <vscale x 8 x i16> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i16_nxv8f64 = sitofp <vscale x 8 x i16> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i32_nxv8f32 = sitofp <vscale x 8 x i32> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i32_nxv8f64 = sitofp <vscale x 8 x i32> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i64_nxv8f32 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i64_nxv8f64 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i1_nxv8f32 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i1_nxv8f64 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i8_nxv16f32 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i8_nxv16f64 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i16_nxv16f32 = sitofp <vscale x 16 x i16> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i16_nxv16f64 = sitofp <vscale x 16 x i16> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i32_nxv16f32 = sitofp <vscale x 16 x i32> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i32_nxv16f64 = sitofp <vscale x 16 x i32> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i64_nxv16f32 = sitofp <vscale x 16 x i64> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i64_nxv16f64 = sitofp <vscale x 16 x i64> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i1_nxv16f32 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i1_nxv16f64 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i8_nxv32f32 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i8_nxv32f64 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i16_nxv32f32 = sitofp <vscale x 32 x i16> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i16_nxv32f64 = sitofp <vscale x 32 x i16> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i32_nxv32f32 = sitofp <vscale x 32 x i32> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i32_nxv32f64 = sitofp <vscale x 32 x i32> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i64_nxv32f32 = sitofp <vscale x 32 x i64> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i64_nxv32f64 = sitofp <vscale x 32 x i64> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i1_nxv32f32 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i1_nxv32f64 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i8_nxv64f32 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i8_nxv64f64 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i16_nxv64f32 = sitofp <vscale x 64 x i16> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i16_nxv64f64 = sitofp <vscale x 64 x i16> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i32_nxv64f32 = sitofp <vscale x 64 x i32> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i32_nxv64f64 = sitofp <vscale x 64 x i32> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i64_nxv64f32 = sitofp <vscale x 64 x i64> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i64_nxv64f64 = sitofp <vscale x 64 x i64> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i1_nxv64f32 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i1_nxv64f64 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
   %v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
@@ -4212,288 +5236,571 @@ define void @sitofp() {
 }
 
 define void @uitofp() {
-; CHECK-LABEL: 'uitofp'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = uitofp <2 x i1> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = uitofp <2 x i1> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = uitofp <4 x i1> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4f64 = uitofp <4 x i1> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8f32 = uitofp <8 x i1> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8f64 = uitofp <8 x i1> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i8_v16f64 = uitofp <16 x i8> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i16_v16f64 = uitofp <16 x i16> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16f64 = uitofp <16 x i32> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64_v16f64 = uitofp <16 x i64> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16f32 = uitofp <16 x i1> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16f64 = uitofp <16 x i1> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32i8_v32f32 = uitofp <32 x i8> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32i8_v32f64 = uitofp <32 x i8> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32f32 = uitofp <32 x i16> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32i16_v32f64 = uitofp <32 x i16> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32f32 = uitofp <32 x i32> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32f64 = uitofp <32 x i32> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i64_v32f32 = uitofp <32 x i64> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32f64 = uitofp <32 x i64> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32f32 = uitofp <32 x i1> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32f64 = uitofp <32 x i1> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v64i8_v64f32 = uitofp <64 x i8> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64i8_v64f64 = uitofp <64 x i8> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64f32 = uitofp <64 x i16> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64i16_v64f64 = uitofp <64 x i16> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i32_v64f32 = uitofp <64 x i32> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64f64 = uitofp <64 x i32> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64i64_v64f32 = uitofp <64 x i64> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v64i64_v64f64 = uitofp <64 x i64> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64f32 = uitofp <64 x i1> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64f64 = uitofp <64 x i1> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v128i8_v128f32 = uitofp <128 x i8> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %v128i8_v128f64 = uitofp <128 x i8> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128f32 = uitofp <128 x i16> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %v128i16_v128f64 = uitofp <128 x i16> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v128i32_v128f32 = uitofp <128 x i32> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128f64 = uitofp <128 x i32> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128i64_v128f32 = uitofp <128 x i64> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v128i64_v128f64 = uitofp <128 x i64> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128f32 = uitofp <128 x i1> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128f64 = uitofp <128 x i1> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f32 = uitofp <vscale x 1 x i16> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1f64 = uitofp <vscale x 1 x i16> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f32 = uitofp <vscale x 1 x i32> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f64 = uitofp <vscale x 1 x i32> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f32 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f64 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i8_nxv2f64 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f32 = uitofp <vscale x 2 x i16> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i16_nxv2f64 = uitofp <vscale x 2 x i16> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f32 = uitofp <vscale x 2 x i32> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2f64 = uitofp <vscale x 2 x i32> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64_nxv2f32 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f64 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2f64 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i8_nxv4f32 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i8_nxv4f64 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4f32 = uitofp <vscale x 4 x i16> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i16_nxv4f64 = uitofp <vscale x 4 x i16> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4f32 = uitofp <vscale x 4 x i32> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4f64 = uitofp <vscale x 4 x i32> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f32 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4f64 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4f32 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4f64 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i8_nxv8f32 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8i8_nxv8f64 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8f32 = uitofp <vscale x 8 x i16> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8i16_nxv8f64 = uitofp <vscale x 8 x i16> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32_nxv8f32 = uitofp <vscale x 8 x i32> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8f64 = uitofp <vscale x 8 x i32> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i64_nxv8f32 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64_nxv8f64 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8f32 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8f64 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv16i8_nxv16f32 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16i8_nxv16f64 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16f32 = uitofp <vscale x 16 x i16> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16i16_nxv16f64 = uitofp <vscale x 16 x i16> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32_nxv16f32 = uitofp <vscale x 16 x i32> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16f64 = uitofp <vscale x 16 x i32> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i64_nxv16f32 = uitofp <vscale x 16 x i64> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i64_nxv16f64 = uitofp <vscale x 16 x i64> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16f32 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16f64 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv32i8_nxv32f32 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32i8_nxv32f64 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32f32 = uitofp <vscale x 32 x i16> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32i16_nxv32f64 = uitofp <vscale x 32 x i16> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_nxv32f32 = uitofp <vscale x 32 x i32> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32f64 = uitofp <vscale x 32 x i32> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i64_nxv32f32 = uitofp <vscale x 32 x i64> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32i64_nxv32f64 = uitofp <vscale x 32 x i64> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32f32 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32f64 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv64i8_nxv64f32 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %nxv64i8_nxv64f64 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64f32 = uitofp <vscale x 64 x i16> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %nxv64i16_nxv64f64 = uitofp <vscale x 64 x i16> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv64i32_nxv64f32 = uitofp <vscale x 64 x i32> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64i32_nxv64f64 = uitofp <vscale x 64 x i32> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64i64_nxv64f32 = uitofp <vscale x 64 x i64> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv64i64_nxv64f64 = uitofp <vscale x 64 x i64> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64f32 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64f64 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVE64D-LABEL: 'uitofp'
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = uitofp <2 x i1> undef to <2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = uitofp <2 x i1> undef to <2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = uitofp <4 x i1> undef to <4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4f64 = uitofp <4 x i1> undef to <4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8f32 = uitofp <8 x i1> undef to <8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8f64 = uitofp <8 x i1> undef to <8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i8_v16f64 = uitofp <16 x i8> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i16_v16f64 = uitofp <16 x i16> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32_v16f64 = uitofp <16 x i32> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64_v16f64 = uitofp <16 x i64> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16f32 = uitofp <16 x i1> undef to <16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16f64 = uitofp <16 x i1> undef to <16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32i8_v32f32 = uitofp <32 x i8> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32i8_v32f64 = uitofp <32 x i8> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i16_v32f32 = uitofp <32 x i16> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32i16_v32f64 = uitofp <32 x i16> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32f32 = uitofp <32 x i32> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i32_v32f64 = uitofp <32 x i32> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i64_v32f32 = uitofp <32 x i64> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32f64 = uitofp <32 x i64> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32f32 = uitofp <32 x i1> undef to <32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32f64 = uitofp <32 x i1> undef to <32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v64i8_v64f32 = uitofp <64 x i8> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64i8_v64f64 = uitofp <64 x i8> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i16_v64f32 = uitofp <64 x i16> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64i16_v64f64 = uitofp <64 x i16> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i32_v64f32 = uitofp <64 x i32> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64i32_v64f64 = uitofp <64 x i32> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64i64_v64f32 = uitofp <64 x i64> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v64i64_v64f64 = uitofp <64 x i64> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64f32 = uitofp <64 x i1> undef to <64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64f64 = uitofp <64 x i1> undef to <64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v128i8_v128f32 = uitofp <128 x i8> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %v128i8_v128f64 = uitofp <128 x i8> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v128i16_v128f32 = uitofp <128 x i16> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %v128i16_v128f64 = uitofp <128 x i16> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v128i32_v128f32 = uitofp <128 x i32> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128i32_v128f64 = uitofp <128 x i32> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128i64_v128f32 = uitofp <128 x i64> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v128i64_v128f64 = uitofp <128 x i64> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128f32 = uitofp <128 x i1> undef to <128 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128f64 = uitofp <128 x i1> undef to <128 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f32 = uitofp <vscale x 1 x i16> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1f64 = uitofp <vscale x 1 x i16> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f32 = uitofp <vscale x 1 x i32> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f64 = uitofp <vscale x 1 x i32> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f32 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f64 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i8_nxv2f64 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f32 = uitofp <vscale x 2 x i16> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i16_nxv2f64 = uitofp <vscale x 2 x i16> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f32 = uitofp <vscale x 2 x i32> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2f64 = uitofp <vscale x 2 x i32> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64_nxv2f32 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f64 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2f64 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i8_nxv4f32 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i8_nxv4f64 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4f32 = uitofp <vscale x 4 x i16> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i16_nxv4f64 = uitofp <vscale x 4 x i16> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4f32 = uitofp <vscale x 4 x i32> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4f64 = uitofp <vscale x 4 x i32> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f32 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4f64 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4f32 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4f64 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i8_nxv8f32 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8i8_nxv8f64 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8f32 = uitofp <vscale x 8 x i16> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8i16_nxv8f64 = uitofp <vscale x 8 x i16> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32_nxv8f32 = uitofp <vscale x 8 x i32> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8f64 = uitofp <vscale x 8 x i32> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i64_nxv8f32 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64_nxv8f64 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8f32 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8f64 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv16i8_nxv16f32 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16i8_nxv16f64 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16f32 = uitofp <vscale x 16 x i16> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16i16_nxv16f64 = uitofp <vscale x 16 x i16> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32_nxv16f32 = uitofp <vscale x 16 x i32> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16i32_nxv16f64 = uitofp <vscale x 16 x i32> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i64_nxv16f32 = uitofp <vscale x 16 x i64> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i64_nxv16f64 = uitofp <vscale x 16 x i64> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16f32 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16f64 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv32i8_nxv32f32 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32i8_nxv32f64 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i16_nxv32f32 = uitofp <vscale x 32 x i16> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32i16_nxv32f64 = uitofp <vscale x 32 x i16> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_nxv32f32 = uitofp <vscale x 32 x i32> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32i32_nxv32f64 = uitofp <vscale x 32 x i32> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i64_nxv32f32 = uitofp <vscale x 32 x i64> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32i64_nxv32f64 = uitofp <vscale x 32 x i64> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32f32 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32f64 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv64i8_nxv64f32 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %nxv64i8_nxv64f64 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv64i16_nxv64f32 = uitofp <vscale x 64 x i16> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %nxv64i16_nxv64f64 = uitofp <vscale x 64 x i16> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv64i32_nxv64f32 = uitofp <vscale x 64 x i32> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64i32_nxv64f64 = uitofp <vscale x 64 x i32> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64i64_nxv64f32 = uitofp <vscale x 64 x i64> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv64i64_nxv64f64 = uitofp <vscale x 64 x i64> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64f32 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64f64 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVE64X-LABEL: 'uitofp'
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = uitofp <2 x i1> undef to <2 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = uitofp <2 x i1> undef to <2 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.uitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i1_v4f32 = uitofp <4 x i1> undef to <4 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i1_v4f64 = uitofp <4 x i1> undef to <4 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.uitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i1_v8f32 = uitofp <8 x i1> undef to <8 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i1_v8f64 = uitofp <8 x i1> undef to <8 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i8_v16f64 = uitofp <16 x i8> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i16_v16f64 = uitofp <16 x i16> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i32_v16f64 = uitofp <16 x i32> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i64_v16f64 = uitofp <16 x i64> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i1_v16f32 = uitofp <16 x i1> undef to <16 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i1_v16f64 = uitofp <16 x i1> undef to <16 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.uitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i8_v32f32 = uitofp <32 x i8> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i8_v32f64 = uitofp <32 x i8> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i16_v32f32 = uitofp <32 x i16> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i16_v32f64 = uitofp <32 x i16> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i32_v32f32 = uitofp <32 x i32> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i32_v32f64 = uitofp <32 x i32> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v32i64_v32f32 = uitofp <32 x i64> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v32i64_v32f64 = uitofp <32 x i64> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i1_v32f32 = uitofp <32 x i1> undef to <32 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v32i1_v32f64 = uitofp <32 x i1> undef to <32 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.uitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.uitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i8_v64f32 = uitofp <64 x i8> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i8_v64f64 = uitofp <64 x i8> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i16_v64f32 = uitofp <64 x i16> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i16_v64f64 = uitofp <64 x i16> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v64i32_v64f32 = uitofp <64 x i32> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v64i32_v64f64 = uitofp <64 x i32> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v64i64_v64f32 = uitofp <64 x i64> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v64i64_v64f64 = uitofp <64 x i64> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i1_v64f32 = uitofp <64 x i1> undef to <64 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v64i1_v64f64 = uitofp <64 x i1> undef to <64 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.uitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.uitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v128i8_v128f32 = uitofp <128 x i8> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v128i8_v128f64 = uitofp <128 x i8> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %v128i16_v128f32 = uitofp <128 x i16> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %v128i16_v128f64 = uitofp <128 x i16> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %v128i32_v128f32 = uitofp <128 x i32> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %v128i32_v128f64 = uitofp <128 x i32> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %v128i64_v128f32 = uitofp <128 x i64> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %v128i64_v128f64 = uitofp <128 x i64> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v128i1_v128f32 = uitofp <128 x i1> undef to <128 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %v128i1_v128f64 = uitofp <128 x i1> undef to <128 x double>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 248 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.uitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.uitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i8_nxv1f32 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i8_nxv1f64 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i16_nxv1f32 = uitofp <vscale x 1 x i16> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i16_nxv1f64 = uitofp <vscale x 1 x i16> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i32_nxv1f32 = uitofp <vscale x 1 x i32> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i32_nxv1f64 = uitofp <vscale x 1 x i32> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i64_nxv1f32 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i64_nxv1f64 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i1_nxv1f32 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv1i1_nxv1f64 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.uitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i8_nxv2f32 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i8_nxv2f64 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i16_nxv2f32 = uitofp <vscale x 2 x i16> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i16_nxv2f64 = uitofp <vscale x 2 x i16> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i32_nxv2f32 = uitofp <vscale x 2 x i32> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i32_nxv2f64 = uitofp <vscale x 2 x i32> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i64_nxv2f32 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i64_nxv2f64 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i1_nxv2f32 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv2i1_nxv2f64 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.uitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i8_nxv4f32 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i8_nxv4f64 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i16_nxv4f32 = uitofp <vscale x 4 x i16> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i16_nxv4f64 = uitofp <vscale x 4 x i16> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i32_nxv4f32 = uitofp <vscale x 4 x i32> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i32_nxv4f64 = uitofp <vscale x 4 x i32> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i64_nxv4f32 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i64_nxv4f64 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i1_nxv4f32 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv4i1_nxv4f64 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i8_nxv8f32 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i8_nxv8f64 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i16_nxv8f32 = uitofp <vscale x 8 x i16> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i16_nxv8f64 = uitofp <vscale x 8 x i16> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i32_nxv8f32 = uitofp <vscale x 8 x i32> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i32_nxv8f64 = uitofp <vscale x 8 x i32> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i64_nxv8f32 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i64_nxv8f64 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i1_nxv8f32 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv8i1_nxv8f64 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 8 x float> @llvm.vp.uitofp.nxv8f32.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i8_nxv16f32 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i8_nxv16f64 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i16_nxv16f32 = uitofp <vscale x 16 x i16> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i16_nxv16f64 = uitofp <vscale x 16 x i16> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i32_nxv16f32 = uitofp <vscale x 16 x i32> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i32_nxv16f64 = uitofp <vscale x 16 x i32> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i64_nxv16f32 = uitofp <vscale x 16 x i64> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i64_nxv16f64 = uitofp <vscale x 16 x i64> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i1_nxv16f32 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv16i1_nxv16f64 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.uitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.uitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i8_nxv32f32 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i8_nxv32f64 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i16_nxv32f32 = uitofp <vscale x 32 x i16> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i16_nxv32f64 = uitofp <vscale x 32 x i16> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i32_nxv32f32 = uitofp <vscale x 32 x i32> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i32_nxv32f64 = uitofp <vscale x 32 x i32> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i64_nxv32f32 = uitofp <vscale x 32 x i64> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i64_nxv32f64 = uitofp <vscale x 32 x i64> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i1_nxv32f32 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv32i1_nxv32f64 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.uitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.uitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i8_nxv64f32 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i8_nxv64f64 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i16_nxv64f32 = uitofp <vscale x 64 x i16> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i16_nxv64f64 = uitofp <vscale x 64 x i16> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i32_nxv64f32 = uitofp <vscale x 64 x i32> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i32_nxv64f64 = uitofp <vscale x 64 x i32> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i64_nxv64f32 = uitofp <vscale x 64 x i64> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i64_nxv64f64 = uitofp <vscale x 64 x i64> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i1_nxv64f32 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i1_nxv64f64 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.uitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.uitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
   %v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
@@ -4807,32 +6114,59 @@ define void @uitofp() {
 }
 
 define void @oddvec_sizes() {
-; CHECK-LABEL: 'oddvec_sizes'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext <3 x i8> undef to <3 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <7 x i8> undef to <7 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = sext <15 x i8> undef to <15 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = zext <3 x i8> undef to <3 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = zext <7 x i8> undef to <7 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = zext <15 x i8> undef to <15 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <3 x i32> undef to <3 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <7 x i32> undef to <7 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = trunc <15 x i32> undef to <15 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %10 = bitcast <3 x i32> undef to <3 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %11 = bitcast <7 x i32> undef to <7 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %12 = bitcast <15 x i32> undef to <15 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = sitofp <3 x i32> undef to <3 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = sitofp <7 x i32> undef to <7 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = sitofp <15 x i32> undef to <15 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = uitofp <3 x i32> undef to <3 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = uitofp <7 x i32> undef to <7 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = uitofp <15 x i32> undef to <15 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = fptosi <3 x float> undef to <3 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = fptosi <7 x float> undef to <7 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = fptosi <15 x float> undef to <15 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = fptoui <3 x float> undef to <3 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = fptoui <7 x float> undef to <7 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = fptoui <15 x float> undef to <15 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVE64D-LABEL: 'oddvec_sizes'
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext <3 x i8> undef to <3 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <7 x i8> undef to <7 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = sext <15 x i8> undef to <15 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = zext <3 x i8> undef to <3 x i16>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = zext <7 x i8> undef to <7 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = zext <15 x i8> undef to <15 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <3 x i32> undef to <3 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <7 x i32> undef to <7 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = trunc <15 x i32> undef to <15 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %10 = bitcast <3 x i32> undef to <3 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %11 = bitcast <7 x i32> undef to <7 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %12 = bitcast <15 x i32> undef to <15 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = sitofp <3 x i32> undef to <3 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = sitofp <7 x i32> undef to <7 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = sitofp <15 x i32> undef to <15 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = uitofp <3 x i32> undef to <3 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = uitofp <7 x i32> undef to <7 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = uitofp <15 x i32> undef to <15 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = fptosi <3 x float> undef to <3 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = fptosi <7 x float> undef to <7 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = fptosi <15 x float> undef to <15 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = fptoui <3 x float> undef to <3 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = fptoui <7 x float> undef to <7 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = fptoui <15 x float> undef to <15 x i32>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVE64X-LABEL: 'oddvec_sizes'
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext <3 x i8> undef to <3 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <7 x i8> undef to <7 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = sext <15 x i8> undef to <15 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = zext <3 x i8> undef to <3 x i16>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = zext <7 x i8> undef to <7 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = zext <15 x i8> undef to <15 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <3 x i32> undef to <3 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <7 x i32> undef to <7 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = trunc <15 x i32> undef to <15 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = bitcast <3 x i32> undef to <3 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = bitcast <7 x i32> undef to <7 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = bitcast <15 x i32> undef to <15 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %13 = sitofp <3 x i32> undef to <3 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = sitofp <7 x i32> undef to <7 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %15 = sitofp <15 x i32> undef to <15 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %16 = uitofp <3 x i32> undef to <3 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = uitofp <7 x i32> undef to <7 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %18 = uitofp <15 x i32> undef to <15 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %19 = fptosi <3 x float> undef to <3 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %20 = fptosi <7 x float> undef to <7 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %21 = fptosi <15 x float> undef to <15 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %22 = fptoui <3 x float> undef to <3 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %23 = fptoui <7 x float> undef to <7 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %24 = fptoui <15 x float> undef to <15 x i32>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   sext <3 x i8> undef to <3 x i16>
   sext <7 x i8> undef to <7 x i32>
@@ -4873,16 +6207,27 @@ define void @oddvec_sizes() {
 ; vector splitting.  We previously crashed on this case due to an
 ; infinite recursion between cast costing and scalarization costing.
 define void @legalization_crash() {
-; CHECK-LABEL: 'legalization_crash'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16700 for instruction: %1 = bitcast <24 x i8> undef to <192 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %2 = trunc <192 x i8> undef to <192 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %3 = zext <192 x i1> undef to <192 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %4 = sext <192 x i1> undef to <192 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 948 for instruction: %5 = sitofp <192 x i1> undef to <192 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 948 for instruction: %6 = uitofp <192 x i1> undef to <192 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16892 for instruction: %7 = fptosi <192 x float> undef to <192 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16892 for instruction: %8 = fptoui <192 x float> undef to <192 x i1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVE64D-LABEL: 'legalization_crash'
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16700 for instruction: %1 = bitcast <24 x i8> undef to <192 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %2 = trunc <192 x i8> undef to <192 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %3 = zext <192 x i1> undef to <192 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %4 = sext <192 x i1> undef to <192 x i8>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 948 for instruction: %5 = sitofp <192 x i1> undef to <192 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 948 for instruction: %6 = uitofp <192 x i1> undef to <192 x float>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16892 for instruction: %7 = fptosi <192 x float> undef to <192 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 16892 for instruction: %8 = fptoui <192 x float> undef to <192 x i1>
+; ZVE64D-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVE64X-LABEL: 'legalization_crash'
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16700 for instruction: %1 = bitcast <24 x i8> undef to <192 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %2 = trunc <192 x i8> undef to <192 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %3 = zext <192 x i1> undef to <192 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %4 = sext <192 x i1> undef to <192 x i8>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %5 = sitofp <192 x i1> undef to <192 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %6 = uitofp <192 x i1> undef to <192 x float>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16892 for instruction: %7 = fptosi <192 x float> undef to <192 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 16892 for instruction: %8 = fptoui <192 x float> undef to <192 x i1>
+; ZVE64X-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   bitcast <24 x i8> undef to <192 x i1>
   trunc <192 x i8> undef to <192 x i1>
@@ -4894,6 +6239,3 @@ define void @legalization_crash() {
   fptoui <192 x float> undef to <192 x i1>
   ret void
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32: {{.*}}
-; RV64: {{.*}}
diff --git a/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll b/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll
index ecc7fc8..245e8f7 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/intrinsic-cost-crash.ll
@@ -23,7 +23,7 @@
 %"class.llvm::Metadata.306.1758.9986.10470.10954.11438.11922.12406.12890.13374.13858.15310.15794.16278.17730.19182.21118.25958.26926.29346.29830.30314.30798.31282.31766.32250.32734.33702.36606.38058.41638" = type { i8, i8, i16, i32 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end(i64, ptr nocapture) #0
+declare void @llvm.lifetime.end(ptr nocapture) #0
 
 ; Function Attrs: nounwind ssp uwtable
 define hidden void @fun(ptr %N, i1 %arg) #1 align 2 {
@@ -42,7 +42,6 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv190 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next191, %for.body ]
-  call void @llvm.lifetime.end(i64 16, ptr nonnull null)
   %indvars.iv.next191 = add nuw nsw i64 %indvars.iv190, 1
   %exitcond193 = icmp eq i64 %indvars.iv.next191, %wide.trip.count192
   br i1 %exitcond193, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/free-intrinsics.ll b/llvm/test/Analysis/CostModel/X86/free-intrinsics.ll
index 3a54428..cef960d 100644
--- a/llvm/test/Analysis/CostModel/X86/free-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/X86/free-intrinsics.ll
@@ -14,8 +14,8 @@ define i32 @trivially_free() {
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr %alloca)
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr %alloca)
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(ptr %alloca)
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(ptr %alloca)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
@@ -32,8 +32,8 @@ define i32 @trivially_free() {
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr %alloca)
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr %alloca)
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(ptr %alloca)
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(ptr %alloca)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
@@ -49,8 +49,8 @@ define i32 @trivially_free() {
   %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
   %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
   %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloca)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloca)
+  call void @llvm.lifetime.start.p0(ptr %alloca)
+  call void @llvm.lifetime.end.p0(ptr %alloca)
   %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 1, i1 1, i1 1)
   %a6 = call ptr @llvm.ptr.annotation.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
   call void @llvm.var.annotation(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
@@ -66,8 +66,8 @@ declare void @llvm.invariant.end.p0(ptr, i64, ptr)
 declare ptr @llvm.launder.invariant.group.p0(ptr)
 declare ptr @llvm.strip.invariant.group.p0(ptr)
 declare i1 @llvm.is.constant.i32(i32)
-declare void @llvm.lifetime.start.p0(i64, ptr)
-declare void @llvm.lifetime.end.p0(i64, ptr)
+declare void @llvm.lifetime.start.p0(ptr)
+declare void @llvm.lifetime.end.p0(ptr)
 declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
 declare ptr @llvm.ptr.annotation.p0(ptr, ptr, ptr, i32, ptr)
 declare void @llvm.var.annotation(ptr, ptr, ptr, i32, ptr)
diff --git a/llvm/test/Analysis/CostModel/free-intrinsics-datalayout.ll b/llvm/test/Analysis/CostModel/free-intrinsics-datalayout.ll
index 96064dc..2acc8e8 100644
--- a/llvm/test/Analysis/CostModel/free-intrinsics-datalayout.ll
+++ b/llvm/test/Analysis/CostModel/free-intrinsics-datalayout.ll
@@ -16,8 +16,8 @@ define i32 @trivially_free() {
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr %alloca)
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr %alloca)
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(ptr %alloca)
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(ptr %alloca)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a7 = call i1 @llvm.allow.ubsan.check(i8 123)
@@ -36,8 +36,8 @@ define i32 @trivially_free() {
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr %alloca)
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr %alloca)
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(ptr %alloca)
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(ptr %alloca)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a7 = call i1 @llvm.allow.ubsan.check(i8 123)
@@ -55,8 +55,8 @@ define i32 @trivially_free() {
   %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
   %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
   %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloca)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloca)
+  call void @llvm.lifetime.start.p0(ptr %alloca)
+  call void @llvm.lifetime.end.p0(ptr %alloca)
   %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 1, i1 1, i1 1)
   %a6 = call ptr @llvm.ptr.annotation.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
   %a7 = call i1 @llvm.allow.ubsan.check(i8 123)
@@ -74,8 +74,8 @@ declare void @llvm.invariant.end.p0(ptr, i64, ptr)
 declare ptr @llvm.launder.invariant.group.p0(ptr)
 declare ptr @llvm.strip.invariant.group.p0(ptr)
 declare i1 @llvm.is.constant.i32(i32)
-declare void @llvm.lifetime.start.p0(i64, ptr)
-declare void @llvm.lifetime.end.p0(i64, ptr)
+declare void @llvm.lifetime.start.p0(ptr)
+declare void @llvm.lifetime.end.p0(ptr)
 declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
 declare ptr @llvm.ptr.annotation.p0(ptr, ptr, ptr, i32, ptr)
 declare void @llvm.var.annotation(ptr, ptr, ptr, i32, ptr)
diff --git a/llvm/test/Analysis/CostModel/free-intrinsics-no_info.ll b/llvm/test/Analysis/CostModel/free-intrinsics-no_info.ll
index f989ebe..7f002d0 100644
--- a/llvm/test/Analysis/CostModel/free-intrinsics-no_info.ll
+++ b/llvm/test/Analysis/CostModel/free-intrinsics-no_info.ll
@@ -14,8 +14,8 @@ define i32 @trivially_free() {
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr %alloca)
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr %alloca)
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(ptr %alloca)
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(ptr %alloca)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
@@ -34,8 +34,8 @@ define i32 @trivially_free() {
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr %alloca)
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr %alloca)
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(ptr %alloca)
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(ptr %alloca)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
@@ -53,8 +53,8 @@ define i32 @trivially_free() {
   %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
   %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
   %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloca)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloca)
+  call void @llvm.lifetime.start.p0(ptr %alloca)
+  call void @llvm.lifetime.end.p0(ptr %alloca)
   %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 1, i1 1, i1 1)
   %a6 = call ptr @llvm.ptr.annotation.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
   call void @llvm.var.annotation(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
@@ -72,8 +72,8 @@ declare void @llvm.invariant.end.p0(ptr, i64, ptr)
 declare ptr @llvm.launder.invariant.group.p0(ptr)
 declare ptr @llvm.strip.invariant.group.p0(ptr)
 declare i1 @llvm.is.constant.i32(i32)
-declare void @llvm.lifetime.start.p0(i64, ptr)
-declare void @llvm.lifetime.end.p0(i64, ptr)
+declare void @llvm.lifetime.start.p0(ptr)
+declare void @llvm.lifetime.end.p0(ptr)
 declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
 declare ptr @llvm.ptr.annotation.p0(ptr, ptr, ptr, i32, ptr)
 declare void @llvm.var.annotation(ptr, ptr, ptr, i32, ptr)
diff --git a/llvm/test/Analysis/DXILResource/buffer-frombinding.ll b/llvm/test/Analysis/DXILResource/buffer-frombinding.ll
index 2623e6f..aeeb21e 100644
--- a/llvm/test/Analysis/DXILResource/buffer-frombinding.ll
+++ b/llvm/test/Analysis/DXILResource/buffer-frombinding.ll
@@ -15,7 +15,7 @@
 define void @test_typedbuffer() {
   ; ByteAddressBuffer Buf : register(t8, space1)
   %srv0 = call target("dx.RawBuffer", void, 0, 0)
-      @llvm.dx.resource.handlefrombinding(i32 1, i32 8, i32 1, i32 0, i1 false, ptr @Zero.str)
+      @llvm.dx.resource.handlefrombinding(i32 1, i32 8, i32 1, i32 0, ptr @Zero.str)
   ; CHECK: Resource [[SRV0:[0-9]+]]:
   ; CHECK:   Name: Zero
   ; CHECK:   Binding:
@@ -29,7 +29,7 @@ define void @test_typedbuffer() {
   ; struct S { float4 a; uint4 b; };
   ; StructuredBuffer<S> Buf : register(t2, space4)
   %srv1 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0)
-      @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 1, i32 0, i1 false, ptr @One.str)
+      @llvm.dx.resource.handlefrombinding(i32 4, i32 2, i32 1, i32 0, ptr @One.str)
   ; CHECK: Resource [[SRV1:[0-9]+]]:
   ; CHECK:   Name: One
   ; CHECK:   Binding:
@@ -44,7 +44,7 @@ define void @test_typedbuffer() {
 
   ; Buffer<uint4> Buf[24] : register(t3, space5)
   %srv2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0)
-      @llvm.dx.resource.handlefrombinding(i32 5, i32 3, i32 24, i32 0, i1 false, ptr @Two.str)
+      @llvm.dx.resource.handlefrombinding(i32 5, i32 3, i32 24, i32 0, ptr @Two.str)
   ; CHECK: Resource [[SRV2:[0-9]+]]:
   ; CHECK:   Name: Two
   ; CHECK:   Binding:
@@ -59,7 +59,7 @@ define void @test_typedbuffer() {
 
   ; RWBuffer<int> Buf : register(u7, space2)
   %uav0 = call target("dx.TypedBuffer", i32, 1, 0, 1)
-      @llvm.dx.resource.handlefrombinding(i32 2, i32 7, i32 1, i32 0, i1 false, ptr @Three.str)
+      @llvm.dx.resource.handlefrombinding(i32 2, i32 7, i32 1, i32 0, ptr @Three.str)
   ; CHECK: Resource [[UAV0:[0-9]+]]:
   ; CHECK:   Name: Three
   ; CHECK:   Binding:
@@ -77,7 +77,7 @@ define void @test_typedbuffer() {
 
   ; RWBuffer<float4> Buf : register(u5, space3)
   %uav1 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
-      @llvm.dx.resource.handlefrombinding(i32 3, i32 5, i32 1, i32 0, i1 false, ptr @Four.str)
+      @llvm.dx.resource.handlefrombinding(i32 3, i32 5, i32 1, i32 0, ptr @Four.str)
   call i32 @llvm.dx.resource.updatecounter(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %uav1, i8 -1)
   ; CHECK: Resource [[UAV1:[0-9]+]]:
   ; CHECK:   Name: Four
@@ -97,10 +97,10 @@ define void @test_typedbuffer() {
   ; RWBuffer<float4> BufferArray[10] : register(u0, space4)
   ; RWBuffer<float4> Buf = BufferArray[0]
   %uav2_1 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
-        @llvm.dx.resource.handlefrombinding(i32 4, i32 0, i32 10, i32 0, i1 false, ptr @Array.str)
+        @llvm.dx.resource.handlefrombinding(i32 4, i32 0, i32 10, i32 0, ptr @Array.str)
   ; RWBuffer<float4> Buf = BufferArray[5]
   %uav2_2 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
-        @llvm.dx.resource.handlefrombinding(i32 4, i32 0, i32 10, i32 5, i1 false, ptr @Array.str)
+        @llvm.dx.resource.handlefrombinding(i32 4, i32 0, i32 10, i32 5, ptr @Array.str)
   call i32 @llvm.dx.resource.updatecounter(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %uav2_2, i8 1)
   ; CHECK: Resource [[UAV2:[0-9]+]]:
   ; CHECK:   Name: Array
@@ -119,7 +119,7 @@ define void @test_typedbuffer() {
 
   ; RWBuffer<float4> Buf : register(u0, space5)
   %uav3 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
-      @llvm.dx.resource.handlefrombinding(i32 5, i32 0, i32 1, i32 0, i1 false, ptr @Five.str)
+      @llvm.dx.resource.handlefrombinding(i32 5, i32 0, i32 1, i32 0, ptr @Five.str)
   call i32 @llvm.dx.resource.updatecounter(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %uav3, i8 -1)
   call i32 @llvm.dx.resource.updatecounter(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %uav3, i8 1)
   ; CHECK: Resource [[UAV3:[0-9]+]]:
@@ -138,7 +138,7 @@ define void @test_typedbuffer() {
   ; CHECK:   Element Count: 4
 
   %cb0 = call target("dx.CBuffer", {float})
-     @llvm.dx.resource.handlefrombinding(i32 1, i32 0, i32 1, i32 0, i1 false, ptr @CB.str)
+     @llvm.dx.resource.handlefrombinding(i32 1, i32 0, i32 1, i32 0, ptr @CB.str)
   ; CHECK: Resource [[CB0:[0-9]+]]:
   ; CHECK:   Name: CB
   ; CHECK:   Binding:
@@ -146,12 +146,12 @@ define void @test_typedbuffer() {
   ; CHECK:     Space: 1
   ; CHECK:     Lower Bound: 0
   ; CHECK:     Size: 1
-  ; CHECK:   Class: CBuffer
+  ; CHECK:   Class: CBV
   ; CHECK:   Kind: CBuffer
   ; CHECK:   CBuffer size: 4
 
   %cb1 = call target("dx.CBuffer", target("dx.Layout", {float}, 4, 0))
-     @llvm.dx.resource.handlefrombinding(i32 1, i32 8, i32 1, i32 0, i1 false, ptr @Constants.str)
+     @llvm.dx.resource.handlefrombinding(i32 1, i32 8, i32 1, i32 0, ptr @Constants.str)
   ; CHECK: Resource [[CB1:[0-9]+]]:
   ; CHECK:   Name: Constants
   ; CHECK:   Binding:
@@ -159,7 +159,7 @@ define void @test_typedbuffer() {
   ; CHECK:     Space: 1
   ; CHECK:     Lower Bound: 8
   ; CHECK:     Size: 1
-  ; CHECK:   Class: CBuffer
+  ; CHECK:   Class: CBV
   ; CHECK:   Kind: CBuffer
   ; CHECK:   CBuffer size: 4
 
diff --git a/llvm/test/Analysis/Delinearization/a.ll b/llvm/test/Analysis/Delinearization/a.ll
index 02ba3fd..755c9ba 100644
--- a/llvm/test/Analysis/Delinearization/a.ll
+++ b/llvm/test/Analysis/Delinearization/a.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 ;
 ; void foo(long n, long m, long o, int A[n][m][o]) {
@@ -7,12 +8,15 @@
 ;         A[2*i+3][3*j-4][5*k+7] = 1;
 ; }
 
-; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
-; CHECK: ArrayRef[{3,+,2}<nuw><%for.i>][{-4,+,3}<nw><%for.j>][{7,+,5}<nw><%for.k>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, ptr nocapture %A) #0 {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store i32 1, ptr %arrayidx11.us.us, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}(28 + (4 * (-4 + (3 * %m)) * %o)),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{3,+,2}<nuw><%for.i>][{-4,+,3}<nw><%for.j>][{7,+,5}<nw><%for.k>]
+;
 entry:
   %cmp32 = icmp sgt i64 %n, 0
   br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end17
diff --git a/llvm/test/Analysis/Delinearization/byte_offset.ll b/llvm/test/Analysis/Delinearization/byte_offset.ll
index 6534d3e..90b1f03 100644
--- a/llvm/test/Analysis/Delinearization/byte_offset.ll
+++ b/llvm/test/Analysis/Delinearization/byte_offset.ll
@@ -1,8 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
-; CHECK: AccessFunction: ({0,+,%i2}<%outer.loop> + %unknown)
-; CHECK-NEXT: failed to delinearize
-
 ; void foo(char A[], long i2, bool c) {
 ;   for (long i = 0; ; ++i) {
 ;     char *tmp = &A[i * i2];
@@ -13,6 +11,12 @@
 ; }
 
 define void @foo(ptr %A, i64 %i2, i64 %arg, i1 %c) {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store float 0.000000e+00, ptr %arrayidx, align 4
+; CHECK-NEXT:  In Loop with Header: inner.loop
+; CHECK-NEXT:  AccessFunction: ({0,+,%i2}<%outer.loop> + %unknown)
+; CHECK-NEXT:  failed to delinearize
+;
 entry:
   br label %outer.loop
 
diff --git a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
index 3044a48..c0b1a0b 100644
--- a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
+++ b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
@@ -1,23 +1,25 @@
-; RUN: opt -passes='print<delinearization>' -disable-output < %s 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-; CHECK:      Inst:  %tmp = load float, ptr %arrayidx, align 4
-; CHECK-NEXT: In Loop with Header: for.inc
-; CHECK-NEXT: AccessFunction: {(4 * %N * %call),+,4}<%for.inc>
-; CHECK-NEXT: Base offset: %A
-; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
-; CHECK-NEXT: ArrayRef[%call][{0,+,1}<nuw><nsw><%for.inc>]
-
-; CHECK:      Inst:  %tmp5 = load float, ptr %arrayidx4, align 4
-; CHECK-NEXT: In Loop with Header: for.inc
-; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc>
-; CHECK-NEXT: Base offset: %B
-; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
-; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.inc>][%call1]
-
 ; Function Attrs: noinline nounwind uwtable
 define void @mat_mul(ptr %C, ptr %A, ptr %B, i64 %N) #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 {
+; CHECK-LABEL: 'mat_mul'
+; CHECK-NEXT:  Inst: %tmp = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  In Loop with Header: for.inc
+; CHECK-NEXT:  AccessFunction: {(4 * %N * %call),+,4}<%for.inc>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[%call][{0,+,1}<nuw><nsw><%for.inc>]
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %tmp5 = load float, ptr %arrayidx4, align 4
+; CHECK-NEXT:  In Loop with Header: for.inc
+; CHECK-NEXT:  AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc>
+; CHECK-NEXT:  Base offset: %B
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.inc>][%call1]
+;
 entry:
   br label %entry.split
 
diff --git a/llvm/test/Analysis/Delinearization/divide_by_one.ll b/llvm/test/Analysis/Delinearization/divide_by_one.ll
index 8c531b7..28fe5c5 100644
--- a/llvm/test/Analysis/Delinearization/divide_by_one.ll
+++ b/llvm/test/Analysis/Delinearization/divide_by_one.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-n32"
@@ -10,19 +11,22 @@ target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-n32"
 ;       dst[r * stride + c] = dst[(r + 1) * stride + c - 1];
 ; }
 
-; AddRec: {{(-1 + ((1 + %bs) * %stride)),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<nw><%for.body3>
-; CHECK: Inst:  %0 = load i8, ptr %arrayidx, align 1
-; CHECK: Base offset: %dst
-; CHECK: ArrayDecl[UnknownSize][%stride] with elements of 1 bytes.
-; CHECK: ArrayRef[{(1 + %bs),+,-1}<nw><%for.cond1.preheader>][{-1,+,1}<nw><%for.body3>]
-
-; AddRec: {{(ptr %bs),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<nw><%for.body3>
-; CHECK: Inst:  store i8 %0, ptr %arrayidx7, align 1
-; CHECK: Base offset: %dst
-; CHECK: ArrayDecl[UnknownSize][%stride] with elements of 1 bytes.
-; CHECK: ArrayRef[{%bs,+,-1}<nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.body3>]
-
 define void @test(ptr nocapture %dst, i32 %stride, i32 %bs) {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT:  Inst: %0 = load i8, ptr %arrayidx, align 1
+; CHECK-NEXT:  In Loop with Header: for.body3
+; CHECK-NEXT:  AccessFunction: {{\{\{}}(-1 + ((1 + %bs) * %stride)),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<nw><%for.body3>
+; CHECK-NEXT:  Base offset: %dst
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%stride] with elements of 1 bytes.
+; CHECK-NEXT:  ArrayRef[{(1 + %bs),+,-1}<nw><%for.cond1.preheader>][{-1,+,1}<nw><%for.body3>]
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i8 %0, ptr %arrayidx7, align 1
+; CHECK-NEXT:  In Loop with Header: for.body3
+; CHECK-NEXT:  AccessFunction: {{\{\{}}(%stride * %bs),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<nsw><%for.body3>
+; CHECK-NEXT:  Base offset: %dst
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%stride] with elements of 1 bytes.
+; CHECK-NEXT:  ArrayRef[{%bs,+,-1}<nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.body3>]
+;
 entry:
   %cmp20 = icmp sgt i32 %bs, -1
   br i1 %cmp20, label %for.cond1.preheader.lr.ph, label %for.end9
diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
new file mode 100644
index 0000000..634850b
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -0,0 +1,540 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes='print<delinearization>' -disable-output -delinearize-use-fixed-size-array-heuristic 2>&1 | FileCheck %s
+
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 32; k++)
+;       A[i][j][k] = 1;
+; }
+
+define void @a_i_j_k(ptr %a) {
+; CHECK-LABEL: 'a_i_j_k'
+; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT:  Base offset: %a
+; CHECK-NEXT:  ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 32; k++)
+;       A[i][7-j][k] = 1;
+; }
+
+define void @a_i_nj_k(ptr %a) {
+; CHECK-LABEL: 'a_i_nj_k'
+; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}896,+,1024}<nuw><nsw><%for.i.header>,+,-128}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT:  Base offset: %a
+; CHECK-NEXT:  ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{7,+,-1}<nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j.subscript = sub i32 7, %j
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; In the following code, the access functions for both stores are represented
+; in the same way in SCEV, so the delinearization results are also the same. We
+; don't have any type information of the underlying objects.
+;
+; void f(int A[][4][64], int B[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 4; j++)
+;     for (k = 0; k < 32; k++) {
+;       A[i][j][k] = 1;
+;       B[i][2*j][k] = 1;
+;     }
+; }
+
+define void @a_ijk_b_i2jk(ptr %a, ptr %b) {
+; CHECK-LABEL: 'a_ijk_b_i2jk'
+; CHECK-NEXT:  Inst: store i32 1, ptr %a.idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,256}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT:  Base offset: %a
+; CHECK-NEXT:  ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 1, ptr %b.idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,256}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT:  Base offset: %b
+; CHECK-NEXT:  ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j2 = shl i32 %j, 1
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %a.idx = getelementptr [4 x [64 x i32]], ptr %a, i32 %i, i32 %j, i32 %k
+  %b.idx = getelementptr [8 x [32 x i32]], ptr %b, i32 %i, i32 %j2, i32 %k
+  store i32 1, ptr %a.idx
+  store i32 1, ptr %b.idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; The type information of the underlying object is not available, so the
+; delinearization result is different from the original array size. In this
+; case, the underlying object is a type of int[][8][32], but the
+; delinearization result is like int[][4][64].
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 3; j++)
+;     for (k = 0; k < 32; k++)
+;       A[i][2*j+1][k] = 1;
+; }
+
+define void @a_i_2j1_k(ptr %a) {
+; CHECK-LABEL: 'a_i_2j1_k'
+; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}128,+,1024}<nuw><nsw><%for.i.header>,+,256}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT:  Base offset: %a
+; CHECK-NEXT:  ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><%for.j.header>][{32,+,1}<nw><%for.k>]
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j2 = shl i32 %j, 1
+  %j.subscript = add i32 %j2, 1
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 3
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Fail to delinearize because the step recurrence value of the i-loop is not
+; divisible by that of the j-loop.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 2; j++)
+;     for (k = 0; k < 42; k++)
+;       A[i][3*j][k] = 1;
+; }
+
+define void @a_i_3j_k(ptr %a) {
+; CHECK-LABEL: 'a_i_3j_k'
+; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,384}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT:  failed to delinearize
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  %j.subscript = mul i32 %j, 3
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j.subscript, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 42
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 2
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Although the step recurrence value of j-loop is not divisible by that of the
+; k-loop, delinearization is possible because we know that the "actual" stride
+; width for the last dimension is 4 instead of 12.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 10; k++)
+;       A[i][j][3*k] = 1;
+; }
+
+define void @a_i_j_3k(ptr %a) {
+; CHECK-LABEL: 'a_i_j_3k'
+; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nw><%for.j.header>,+,12}<nw><%for.k>
+; CHECK-NEXT:  Base offset: %a
+; CHECK-NEXT:  ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,3}<nuw><nsw><%for.k>]
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %k.subscript = mul i32 %k, 3
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j, i32 %k.subscript
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 10
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Fail to delinearize because i is used in multiple subscripts that are not adjacent.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 32; i++)
+;    for (j = 0; j < 2; j++)
+;     for (k = 0; k < 4; k++)
+;       A[i][2*j+k][i] = 1;
+; }
+
+define void @a_i_j2k_i(ptr %a) {
+; CHECK-LABEL: 'a_i_j2k_i'
+; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1028}<%for.i.header>,+,256}<nw><%for.j.header>,+,128}<nw><%for.k>
+; CHECK-NEXT:  failed to delinearize
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %j2 = shl i32 %j, 1
+  %j2.k = add i32 %j2, %k
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %j2.k, i32 %i
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 4
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 2
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 32
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Can delinearize, but the result is different from the original array size. In
+; this case, the outermost two dimensions are melded into one.
+;
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 8; i++)
+;    for (j = 0; j < 10; j++)
+;     for (k = 0; k < 10; k++)
+;       A[i][i][j+k] = 1;
+; }
+
+define void @a_i_i_jk(ptr %a) {
+; CHECK-LABEL: 'a_i_i_jk'
+; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,1152}<%for.i.header>,+,4}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT:  Base offset: %a
+; CHECK-NEXT:  ArrayDecl[UnknownSize][288] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{\{\{}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k>]
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %jk = add i32 %j, %k
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %i, i32 %jk
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 10
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 10
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 8
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; void f(int A[][8][32]) {
+;   for (i = 0; i < 8; i++)
+;    for (j = 0; j < 4; j++)
+;     for (k = 0; k < 4; k++)
+;       for (l = 0; l < 32; l++)
+;         A[i][j+k][l] = 1;
+; }
+
+define void @a_i_jk_l(ptr %a) {
+; CHECK-LABEL: 'a_i_jk_l'
+; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.l
+; CHECK-NEXT:  AccessFunction: {{\{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nw><%for.j.header>,+,128}<nw><%for.k.header>,+,4}<nw><%for.l>
+; CHECK-NEXT:  Base offset: %a
+; CHECK-NEXT:  ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{\{\{}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k.header>][{0,+,1}<nuw><nsw><%for.l>]
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k.header
+
+for.k.header:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k.latch ]
+  %jk = add i32 %j, %k
+  br label %for.l
+
+for.l:
+  %l = phi i32 [ 0, %for.k.header ], [ %l.inc, %for.l ]
+  %idx = getelementptr [8 x [32 x i32]], ptr %a, i32 %i, i32 %jk, i32 %l
+  store i32 1, ptr %idx
+  %l.inc = add i32 %l, 1
+  %cmp.l = icmp slt i32 %l.inc, 32
+  br i1 %cmp.l, label %for.l, label %for.k.latch
+
+for.k.latch:
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 4
+  br i1 %cmp.k, label %for.k.header, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 8
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+; Reject if the address is not a multiple of the element size.
+;
+; void f(int *A) {
+;   for (i = 0; i < 42; i++)
+;    for (j = 0; j < 8; j++)
+;     for (k = 0; k < 32; k++)
+;       *((int *)((char *)A + i*256 + j*32 + k)) = 1;
+; }
+
+define void @non_divisible_by_element_size(ptr %a) {
+; CHECK-LABEL: 'non_divisible_by_element_size'
+; CHECK-NEXT:  Inst: store i32 1, ptr %idx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,256}<nuw><nsw><%for.i.header>,+,32}<nw><%for.j.header>,+,1}<nw><%for.k>
+; CHECK-NEXT:  failed to delinearize
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx = getelementptr [8 x [32 x i8]], ptr %a, i32 %i, i32 %j, i32 %k
+  store i32 1, ptr %idx
+  %k.inc = add i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 32
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 8
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 42
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll b/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll
index 95fefed..3e45452 100644
--- a/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll
+++ b/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<da>,print<delinearization>' -disable-output
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 ;
 ; a, b, c, d, g, h;
 ; char *f;
@@ -26,6 +27,147 @@
 @d = common global i32 0, align 4
 
 define i32 @fn2() {
+; CHECK-LABEL: 'fn2'
+; CHECK-NEXT:  Inst: store i32 %storemerge.i, ptr @a, align 4
+; CHECK-NEXT:  In Loop with Header: for.cond2thread-pre-split.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %9 = load i8, ptr %arrayidx.i, align 1
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: (sext i32 {({(%1 * %2),+,1}<nw><%for.cond2thread-pre-split.i> + %.pr.i),+,8}<nw><%for.body4.i> to i64)
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %conv.i, ptr @c, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %inc.i, ptr @b, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %10 = load i8, ptr %arrayidx.1.i, align 1
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: (sext i32 {({(1 + (%1 * %2)),+,1}<nw><%for.cond2thread-pre-split.i> + %.pr.i),+,8}<nw><%for.body4.i> to i64)
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %conv.1.i, ptr @c, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %inc.1.i, ptr @b, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %11 = load i8, ptr %arrayidx.2.i, align 1
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: (sext i32 {({(2 + (%1 * %2)),+,1}<nw><%for.cond2thread-pre-split.i> + %.pr.i),+,8}<nw><%for.body4.i> to i64)
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %conv.2.i, ptr @c, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %inc.2.i, ptr @b, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %12 = load i8, ptr %arrayidx.3.i, align 1
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: (sext i32 {({(3 + (%1 * %2)),+,1}<nw><%for.cond2thread-pre-split.i> + %.pr.i),+,8}<nw><%for.body4.i> to i64)
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %conv.3.i, ptr @c, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %inc.3.i, ptr @b, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %13 = load i8, ptr %arrayidx.4.i, align 1
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: (sext i32 {({(4 + (%1 * %2)),+,1}<nw><%for.cond2thread-pre-split.i> + %.pr.i),+,8}<nw><%for.body4.i> to i64)
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %conv.4.i, ptr @c, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %inc.4.i, ptr @b, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %14 = load i8, ptr %arrayidx.5.i, align 1
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: (sext i32 {({(5 + (%1 * %2)),+,1}<nw><%for.cond2thread-pre-split.i> + %.pr.i),+,8}<nw><%for.body4.i> to i64)
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %conv.5.i, ptr @c, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %inc.5.i, ptr @b, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %15 = load i8, ptr %arrayidx.6.i, align 1
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: (sext i32 {({(6 + (%1 * %2)),+,1}<nw><%for.cond2thread-pre-split.i> + %.pr.i),+,8}<nw><%for.body4.i> to i64)
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %conv.6.i, ptr @c, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %inc.6.i, ptr @b, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %16 = load i8, ptr %arrayidx.7.i, align 1
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: (sext i32 {({(7 + (%1 * %2)),+,1}<nw><%for.cond2thread-pre-split.i> + %.pr.i),+,8}<nw><%for.body4.i> to i64)
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %conv.7.i, ptr @c, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %inc.7.i, ptr @b, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %21 = load i8, ptr %arrayidx.ur.i, align 1
+; CHECK-NEXT:  In Loop with Header: for.body4.ur.i
+; CHECK-NEXT:  AccessFunction: (sext i32 {({(%1 * %2),+,1}<nw><%for.cond2thread-pre-split.i> + %.ph),+,1}<nw><%for.body4.ur.i> to i64)
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %conv.ur.i, ptr @c, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.ur.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i32 %inc.ur.i, ptr @b, align 4
+; CHECK-NEXT:  In Loop with Header: for.body4.ur.i
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+;
 entry:
   %.pr = load i32, ptr @d, align 4
   %phitmp = icmp eq i32 %.pr, 0
diff --git a/llvm/test/Analysis/Delinearization/himeno_1.ll b/llvm/test/Analysis/Delinearization/himeno_1.ll
index 8792844..292dca6 100644
--- a/llvm/test/Analysis/Delinearization/himeno_1.ll
+++ b/llvm/test/Analysis/Delinearization/himeno_1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
 ; #define MR(mt,n,r,c,d)  mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)]
@@ -26,14 +27,17 @@
 ;           MR(a,0,i,j,k) = i + j + k;
 ; }
 
-; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes.
-; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
-
 %struct.Mat = type { ptr, i32, i32, i32, i32 }
 
 define void @jacobi(i32 %nn, ptr nocapture %a, ptr nocapture %p) nounwind uwtable {
+; CHECK-LABEL: 'jacobi'
+; CHECK-NEXT:  Inst: store float 1.000000e+00, ptr %arrayidx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))<nsw>)),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))<nsw>}<%for.j>,+,4}<%for.k>
+; CHECK-NEXT:  Base offset: %a.base
+; CHECK-NEXT:  ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
+;
 entry:
   %p.rows.ptr = getelementptr inbounds %struct.Mat, ptr %p, i64 0, i32 2
   %p.rows = load i32, ptr %p.rows.ptr
diff --git a/llvm/test/Analysis/Delinearization/himeno_2.ll b/llvm/test/Analysis/Delinearization/himeno_2.ll
index 30b2154..d210539 100644
--- a/llvm/test/Analysis/Delinearization/himeno_2.ll
+++ b/llvm/test/Analysis/Delinearization/himeno_2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
 ; #define MR(mt,n,r,c,d)  mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)]
@@ -26,14 +27,17 @@
 ;           MR(a,0,i,j,k) = i + j + k;
 ; }
 
-; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes.
-; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
-
 %struct.Mat = type { ptr, i32, i32, i32, i32 }
 
 define void @jacobi(i32 %nn, ptr nocapture %a, ptr nocapture %p) nounwind uwtable {
+; CHECK-LABEL: 'jacobi'
+; CHECK-NEXT:  Inst: store float 1.000000e+00, ptr %arrayidx, align 4
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))<nsw>)),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))<nsw>}<%for.j>,+,4}<%for.k>
+; CHECK-NEXT:  Base offset: %a.base
+; CHECK-NEXT:  ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
+;
 entry:
   %p.rows.ptr = getelementptr inbounds %struct.Mat, ptr %p, i64 0, i32 2
   %p.rows = load i32, ptr %p.rows.ptr
diff --git a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
index 84ac1a39..cbe3ec8 100644
--- a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
+++ b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
 ; Derived from the following code:
@@ -8,13 +9,15 @@
 ;       A[2i+b][2j] = 1.0;
 ; }
 
-; AddRec: {{((%m * %b * 8) + %A),+,(2 * %m * 8)}<%for.i>,+,(2 * 8)}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
-; CHECK: ArrayRef[{%b,+,2}<nsw><%for.i>][{0,+,2}<nuw><%for.j>]
-
-
 define void @foo(i64 %n, i64 %m, i64 %b, ptr %A) {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store double 1.000000e+00, ptr %arrayidx, align 8
+; CHECK-NEXT:  In Loop with Header: for.j
+; CHECK-NEXT:  AccessFunction: {{\{\{}}(8 * %m * %b),+,(16 * %m)}<%for.i>,+,16}<%for.j>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{%b,+,2}<nsw><%for.i>][{0,+,2}<nuw><%for.j>]
+;
 entry:
   br label %for.i
 
diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
index 4ebac31..3d21d97 100644
--- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output  2>&1 | FileCheck %s
 
 ; void foo(long n, long m, long o, double A[n][m][o]) {
@@ -8,12 +9,15 @@
 ;         A[i+3][j-4][k+7] = 1.0;
 ; }
 
-; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{3,+,1}<nuw><%for.i>][{-4,+,1}<nsw><%for.j>][{7,+,1}<nuw><nsw><%for.k>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store double 1.000000e+00, ptr %idx, align 8
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}(56 + (8 * (-4 + (3 * %m)) * %o)),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{3,+,1}<nuw><%for.i>][{-4,+,1}<nsw><%for.j>][{7,+,1}<nuw><nsw><%for.k>]
+;
 entry:
   br label %for.i
 
diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
index 0265fb7..3dbd71b 100644
--- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
 ; void foo(long n, long m, long o, long p, double A[n][m][o+p]) {
@@ -8,12 +9,15 @@
 ;         A[i+3][j-4][k+7] = 1.0;
 ; }
 
-; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of 8 bytes.
-; CHECK: ArrayRef[{3,+,1}<nuw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, ptr nocapture %A) nounwind uwtable {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store double 1.000000e+00, ptr %arrayidx10.us.us, align 8
+; CHECK-NEXT:  In Loop with Header: for.body6.us.us
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}(56 + (8 * (-4 + (3 * %m)) * (%o + %p))),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{3,+,1}<nuw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
+;
 entry:
   %add = add nsw i64 %p, %o
   %cmp22 = icmp sgt i64 %n, 0
diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
index 51e0101..a2d64a5 100644
--- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
 ; void foo(long n, long m, long o, double A[n][m][o], long p, long q, long r) {
@@ -8,12 +9,15 @@
 ;         A[i+p][j+q][k+r] = 1.0;
 ; }
 
-; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nsw><%for.j>][{%r,+,1}<nsw><%for.k>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, ptr %A, i64 %p, i64 %q, i64 %r) {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store double 1.000000e+00, ptr %idx, align 8
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}(8 * ((((%m * %p) + %q) * %o) + %r)),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nsw><%for.j>][{%r,+,1}<nsw><%for.k>]
+;
 entry:
   br label %for.i
 
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
index b92abfc..ac83ba1 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
 ; Derived from the following code:
@@ -8,21 +9,22 @@
 ;       A[i][j] = 1.0;
 ; }
 
-; Inst:  %val = load double, ptr %arrayidx
-; In Loop with Header: for.j
-; AddRec: {{0,+,(%m * 8)}<%for.i>,+,8}<%for.j>
-; Base offset: %A
-; ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
-; ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
-
-; Inst:  store double %val, ptr %arrayidx
-; In Loop with Header: for.j
-; AddRec: {{%A,+,(8 * %m)}<%for.i>,+,8}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
-
 define void @foo(i64 %n, i64 %m, ptr %A) {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: %val = load double, ptr %arrayidx, align 8
+; CHECK-NEXT:  In Loop with Header: for.j
+; CHECK-NEXT:  AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store double %val, ptr %arrayidx, align 8
+; CHECK-NEXT:  In Loop with Header: for.j
+; CHECK-NEXT:  AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
+;
 entry:
   br label %for.i
 
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
index cc4c4eb..262a092 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
@@ -1,5 +1,5 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
-; XFAIL: *
 ; We do not recognize anymore variable size arrays.
 
 ; extern void bar(long n, long m, double A[n][m]);
@@ -15,12 +15,13 @@
 ;   }
 ; }
 
-; AddRec: {{%vla.us,+,{8,+,8}<%for.cond7.preheader.lr.ph.split.us.us>}<%for.body9.lr.ph.us.us>,+,8}<%for.body9.us.us>
-; CHECK: Base offset: %vla.us
-; CHECK: ArrayDecl[UnknownSize][{1,+,1}<%for.cond7.preheader.lr.ph.split.us.us>] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.body9.lr.ph.us.us>][{0,+,1}<nuw><nsw><%for.body9.us.us>]
-
 define void @foo(i64 %a, i64 %b) nounwind uwtable {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store double 1.000000e+00, ptr %arrayidx10.us.us, align 8
+; CHECK-NEXT:  In Loop with Header: for.body9.us.us
+; CHECK-NEXT:  AccessFunction: {{\{\{}}0,+,{8,+,8}<%for.cond7.preheader.lr.ph.split.us.us>}<%for.body9.lr.ph.us.us>,+,8}<%for.body9.us.us>
+; CHECK-NEXT:  failed to delinearize
+;
 entry:
   %cmp43 = icmp sgt i64 %a, 1
   br i1 %cmp43, label %for.cond1.preheader.lr.ph, label %for.end19
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
index 252743b..b1405db 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
 ; void foo(long n, long m, long o, double A[n][m][o]) {
@@ -8,12 +9,15 @@
 ;         A[i][j][k] = 1.0;
 ; }
 
-; AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store double 1.000000e+00, ptr %idx, align 8
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
+;
 entry:
   br label %for.i
 
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
index e8ee728..6de072e 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 ; void foo(int n, int m, int o, double A[n][m][o]) {
 ;
@@ -7,15 +8,18 @@
 ;         A[i][j][k] = 1.0;
 ; }
 
-; AddRec: {{{%A,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>,+,8}<%for.k>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
-
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define void @foo(i32 %n, i32 %m, i32 %o, ptr %A) {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store double 1.000000e+00, ptr %idx, align 8
+; CHECK-NEXT:  In Loop with Header: for.k
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))<nuw><nsw>}<%for.j>,+,8}<%for.k>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
+;
 entry:
   %m_zext = zext i32 %m to i64
   %n_zext = zext i32 %o to i64
diff --git a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
index ba32401..d7148c5 100644
--- a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
@@ -1,4 +1,5 @@
-; RUN: opt -aa-pipeline=basic-aa -passes='require<da>,print<delinearization>' -disable-output < %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -12,6 +13,21 @@ target triple = "x86_64-unknown-linux-gnu"
 ; }
 
 define void @foo(i64 %n, i64 %m, ptr %A) {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: store double 1.000000e+00, ptr %arrayidx, align 8
+; CHECK-NEXT:  In Loop with Header: for.j
+; CHECK-NEXT:  AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store double 1.000000e+00, ptr %arrayidx1, align 8
+; CHECK-NEXT:  In Loop with Header: for.j
+; CHECK-NEXT:  AccessFunction: {{\{\{}}0,+,8}<%for.i>,+,(8 * %n)}<%for.j>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%n] with elements of 8 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.i>]
+;
 entry:
   br label %for.i
 
diff --git a/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll b/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll
index b8d4b3c..cbccafd 100644
--- a/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll
+++ b/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -passes='print<delinearization>' -disable-output < %s 2>&1 | FileCheck %s
 ;
 ;    void foo(float *A, long *p) {
@@ -5,13 +6,29 @@
 ;        for (long j = 0; j < 100; j++)
 ;          A[i * (*p) + j] += i + j;
 ;    }
-;
-; CHECK: ArrayDecl[UnknownSize][%pval] with elements of 4 bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%bb2>][{0,+,1}<nuw><nsw><%bb4>]
-;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @foo(ptr %A, ptr %p) {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: %pval = load i64, ptr %p, align 8
+; CHECK-NEXT:  In Loop with Header: bb4
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: %tmp11 = load float, ptr %tmp10, align 4
+; CHECK-NEXT:  In Loop with Header: bb4
+; CHECK-NEXT:  AccessFunction: (4 * (({0,+,1}<nuw><nsw><%bb2> * %pval)<nsw> + {0,+,1}<nuw><nsw><%bb4>)<nsw>)<nsw>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%pval] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%bb2>][{0,+,1}<nuw><nsw><%bb4>]
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store float %tmp12, ptr %tmp10, align 4
+; CHECK-NEXT:  In Loop with Header: bb4
+; CHECK-NEXT:  AccessFunction: (4 * (({0,+,1}<nuw><nsw><%bb2> * %pval)<nsw> + {0,+,1}<nuw><nsw><%bb4>)<nsw>)<nsw>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%pval] with elements of 4 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%bb2>][{0,+,1}<nuw><nsw><%bb4>]
+;
 bb:
   br label %bb2
 
diff --git a/llvm/test/Analysis/Delinearization/terms_with_identity_factor.ll b/llvm/test/Analysis/Delinearization/terms_with_identity_factor.ll
index ab5ae05..86adcc8 100644
--- a/llvm/test/Analysis/Delinearization/terms_with_identity_factor.ll
+++ b/llvm/test/Analysis/Delinearization/terms_with_identity_factor.ll
@@ -1,23 +1,27 @@
-; REQUIRES: asserts
-; RUN: opt < %s -passes='print<delinearization>' -disable-output -debug 2>&1 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 2>&1 | FileCheck %s
 ; void foo (int m, int n, char *A) {
 ;    for (int i=0; i < m; i++)
 ;      for(int j=0; j< n; j++)
 ;        A[i*n+j] += 1;
 ;}
 
-; ModuleID = 'delin.cpp'
-;target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-;target triple = "aarch64--linux-gnu"
-
-; CHECK-LABEL: Delinearization on function foo
-; CHECK: Inst:  %4 = load i8, ptr %arrayidx.us, align 1
-; CHECK: Subscripts
-; CHECK-NEXT: {0,+,1}<nuw><nsw><%for.body3.lr.ph.us>
-; CHECK-NEXT: {0,+,1}<nuw><nsw><%for.body3.us>
-; CHECK: succeeded to delinearize
-
 define void @foo(i32 %m, i32 %n, ptr nocapture %A) #0 {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Inst: %4 = load i8, ptr %arrayidx.us, align 1
+; CHECK-NEXT:  In Loop with Header: for.body3.us
+; CHECK-NEXT:  AccessFunction: {{\{\{}}0,+,(sext i32 %n to i64)}<nsw><%for.body3.lr.ph.us>,+,1}<nsw><%for.body3.us>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][(sext i32 %n to i64)] with elements of 1 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.body3.lr.ph.us>][{0,+,1}<nuw><nsw><%for.body3.us>]
+; CHECK-EMPTY:
+; CHECK-NEXT:  Inst: store i8 %add4.us, ptr %arrayidx.us, align 1
+; CHECK-NEXT:  In Loop with Header: for.body3.us
+; CHECK-NEXT:  AccessFunction: {{\{\{}}0,+,(sext i32 %n to i64)}<nsw><%for.body3.lr.ph.us>,+,1}<nsw><%for.body3.us>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][(sext i32 %n to i64)] with elements of 1 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.body3.lr.ph.us>][{0,+,1}<nuw><nsw><%for.body3.us>]
+;
 entry:
   br label %entry.split
 
diff --git a/llvm/test/Analysis/Delinearization/type_mismatch.ll b/llvm/test/Analysis/Delinearization/type_mismatch.ll
index a950151..1b78f24 100644
--- a/llvm/test/Analysis/Delinearization/type_mismatch.ll
+++ b/llvm/test/Analysis/Delinearization/type_mismatch.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -passes='print<delinearization>' -disable-output
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 ; Test that SCEV divide code doesn't crash when attempting to create a SCEV
@@ -8,19 +9,25 @@
 
 target datalayout = "e-m:e-p:32:32-i64:64-a:0-v32:32-n16:32"
 
-define fastcc void @test(i1 %arg) {
+define fastcc void @test(i1 %arg, ptr %x) {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT:  Inst: store i8 42, ptr %arrayidx.phi, align 1
+; CHECK-NEXT:  In Loop with Header: for.body11
+; CHECK-NEXT:  AccessFunction: 0
+; CHECK-NEXT:  failed to delinearize
+;
 entry:
-  %0 = load i16, ptr undef, align 2
+  %0 = load i16, ptr %x, align 2
   %conv21 = zext i16 %0 to i32
   br label %for.cond7.preheader
 
 for.cond7.preheader:
-  %p1.022 = phi ptr [ undef, %entry ], [ %add.ptr, %for.end ]
+  %p1.022 = phi ptr [ %x, %entry ], [ %add.ptr, %for.end ]
   br label %for.body11
 
 for.body11:
-  %arrayidx.phi = phi ptr [ %p1.022, %for.cond7.preheader ], [ undef, %for.body11 ]
-  store i8 undef, ptr %arrayidx.phi, align 1
+  %arrayidx.phi = phi ptr [ %p1.022, %for.cond7.preheader ], [ %x, %for.body11 ]
+  store i8 42, ptr %arrayidx.phi, align 1
   br i1 %arg, label %for.body11, label %for.end
 
 for.end:
diff --git a/llvm/test/Analysis/Delinearization/undef.ll b/llvm/test/Analysis/Delinearization/undef.ll
deleted file mode 100644
index 1e62c45..0000000
--- a/llvm/test/Analysis/Delinearization/undef.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt < %s -passes='print<delinearization>' -disable-output
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @foo(ptr %Ey, i1 %arg) {
-entry:
-  br i1 %arg, label %for.cond55.preheader, label %for.end324
-
-for.cond55.preheader:
-  %iz.069 = phi i64 [ %inc323, %for.inc322 ], [ 0, %entry ]
-  br i1 %arg, label %for.cond58.preheader, label %for.inc322
-
-for.cond58.preheader:
-  %iy.067 = phi i64 [ %inc320, %for.end ], [ 0, %for.cond55.preheader ]
-  br i1 %arg, label %for.body60, label %for.end
-
-for.body60:
-  %ix.062 = phi i64 [ %inc, %for.body60 ], [ 0, %for.cond58.preheader ]
-  %0 = mul i64 %iz.069, undef
-  %tmp5 = add i64 %iy.067, %0
-  %tmp6 = mul i64 %tmp5, undef
-  %arrayidx69.sum = add i64 undef, %tmp6
-  %arrayidx70 = getelementptr inbounds double, ptr %Ey, i64 %arrayidx69.sum
-  %1 = load double, ptr %arrayidx70, align 8
-  %inc = add nsw i64 %ix.062, 1
-  br i1 false, label %for.body60, label %for.end
-
-for.end:
-  %inc320 = add nsw i64 %iy.067, 1
-  br i1 %arg, label %for.cond58.preheader, label %for.inc322
-
-for.inc322:
-  %inc323 = add nsw i64 %iz.069, 1
-  br i1 %arg, label %for.cond55.preheader, label %for.end324
-
-for.end324:
-  ret void
-}
diff --git a/llvm/test/Analysis/DemandedBits/ashr.ll b/llvm/test/Analysis/DemandedBits/ashr.ll
new file mode 100644
index 0000000..6185d4c
--- /dev/null
+++ b/llvm/test/Analysis/DemandedBits/ashr.ll
@@ -0,0 +1,198 @@
+; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
+
+define i8 @test_ashr_const_amount_4(i32 %a) {
+; CHECK-LABEL: 'test_ashr_const_amount_4'
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr = ashr i32 %a, 4
+; CHECK-DAG:  DemandedBits: 0xff0 for %a in   %ashr = ashr i32 %a, 4
+; CHECK-DAG:  DemandedBits: 0xffffffff for 4 in   %ashr = ashr i32 %a, 4
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr.t = trunc i32 %ashr to i8
+; CHECK-DAG:  DemandedBits: 0xff for %ashr in   %ashr.t = trunc i32 %ashr to i8
+;
+  %ashr = ashr i32 %a, 4
+  %ashr.t = trunc i32 %ashr to i8
+  ret i8 %ashr.t
+}
+
+define i8 @test_ashr_const_amount_5(i32 %a) {
+; CHECK-LABEL: 'test_ashr_const_amount_5'
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr = ashr i32 %a, 5
+; CHECK-DAG:  DemandedBits: 0x1fe0 for %a in   %ashr = ashr i32 %a, 5
+; CHECK-DAG:  DemandedBits: 0xffffffff for 5 in   %ashr = ashr i32 %a, 5
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr.t = trunc i32 %ashr to i8
+; CHECK-DAG:  DemandedBits: 0xff for %ashr in   %ashr.t = trunc i32 %ashr to i8
+;
+  %ashr = ashr i32 %a, 5
+  %ashr.t = trunc i32 %ashr to i8
+  ret i8 %ashr.t
+}
+
+define i8 @test_ashr_const_amount_8(i32 %a) {
+; CHECK-LABEL: 'test_ashr_const_amount_8'
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr = ashr i32 %a, 8
+; CHECK-DAG:  DemandedBits: 0xff00 for %a in   %ashr = ashr i32 %a, 8
+; CHECK-DAG:  DemandedBits: 0xffffffff for 8 in   %ashr = ashr i32 %a, 8
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr.t = trunc i32 %ashr to i8
+; CHECK-DAG:  DemandedBits: 0xff for %ashr in   %ashr.t = trunc i32 %ashr to i8
+;
+  %ashr = ashr i32 %a, 8
+  %ashr.t = trunc i32 %ashr to i8
+  ret i8 %ashr.t
+}
+
+define i8 @test_ashr_const_amount_9(i32 %a) {
+
+; CHECK-LABEL: 'test_ashr_const_amount_9'
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr.t = trunc i32 %ashr to i8
+; CHECK-DAG:  DemandedBits: 0xff for %ashr in   %ashr.t = trunc i32 %ashr to i8
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr = ashr i32 %a, 8
+; CHECK-DAG:  DemandedBits: 0xff00 for %a in   %ashr = ashr i32 %a, 8
+; CHECK-DAG:  DemandedBits: 0xffffffff for 8 in   %ashr = ashr i32 %a, 8
+;
+  %ashr = ashr i32 %a, 8
+  %ashr.t = trunc i32 %ashr to i8
+  ret i8 %ashr.t
+}
+
+define i8 @test_ashr(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_ashr'
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %a in   %ashr = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %ashr = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr.t = trunc i32 %ashr to i8
+; CHECK-DAG:  DemandedBits: 0xff for %ashr in   %ashr.t = trunc i32 %ashr to i8
+;
+  %ashr = ashr i32 %a, %b
+  %ashr.t = trunc i32 %ashr to i8
+  ret i8 %ashr.t
+}
+
+define i8 @test_ashr_range_1(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_ashr_range_1'
+; CHECK-DAG:  DemandedBits: 0xff for   %shl.t = trunc i32 %ashr to i8
+; CHECK-DAG:  DemandedBits: 0xff for %ashr in   %shl.t = trunc i32 %ashr to i8
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0x3 for %b in   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0xffffffff for 3 in   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0xff for   %ashr = ashr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0x7ff for %a in   %ashr = ashr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b2 in   %ashr = ashr i32 %a, %b2
+;
+  %b2 = and i32 %b, 3
+  %ashr = ashr i32 %a, %b2
+  %shl.t = trunc i32 %ashr to i8
+  ret i8 %shl.t
+}
+
+define i32 @test_ashr_range_2(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_ashr_range_2'
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0x3 for %b in   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0xffffffff for 3 in   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %ashr = ashr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for %a in   %ashr = ashr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b2 in   %ashr = ashr i32 %a, %b2
+;
+  %b2 = and i32 %b, 3
+  %ashr = ashr i32 %a, %b2
+  ret i32 %ashr
+}
+
+define i32 @test_ashr_range_3(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_ashr_range_3'
+; CHECK-DAG:  DemandedBits:  0xffff for   %ashr = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits:  0xffffffff for %a in   %ashr = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits:  0xffffffff for %b in   %ashr = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits:  0xffffffff for   %shl = shl i32 %ashr, 16
+; CHECK-DAG:  DemandedBits:  0xffff for %ashr in   %shl = shl i32 %ashr, 16
+; CHECK-DAG:  DemandedBits:  0xffffffff for 16 in   %shl = shl i32 %ashr, 16
+;
+  %ashr = ashr i32 %a, %b
+  %shl = shl i32 %ashr, 16
+  ret i32 %shl
+}
+define i32 @test_ashr_range_4(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_ashr_range_4'
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %shr = lshr i32 %ashr, 8
+; CHECK-DAG:  DemandedBits: 0xffffff00 for %ashr in   %shr = lshr i32 %ashr, 8
+; CHECK-DAG:  DemandedBits: 0xffffffff for 8 in   %shr = lshr i32 %ashr, 8
+; CHECK-DAG:  DemandedBits: 0xffffff00 for   %ashr = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffff00 for %a in   %ashr = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %ashr = ashr i32 %a, %b
+  %ashr = ashr i32 %a, %b
+  %shr = lshr i32 %ashr, 8
+  ret i32 %shr
+}
+
+define i32 @test_ashr_range_5(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_ashr_range_5'
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %2 = and i32 %1, 255
+; CHECK-DAG:  DemandedBits: 0xff for %1 in   %2 = and i32 %1, 255
+; CHECK-DAG:  DemandedBits: 0xffffffff for 255 in   %2 = and i32 %1, 255
+; CHECK-DAG:  DemandedBits: 0xff for   %1 = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %a in   %1 = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %1 = ashr i32 %a, %b
+;
+  %1 = ashr i32 %a, %b
+  %2 = and i32 %1, 255
+  ret i32 %2
+}
+
+define i32 @test_ashr_range_6(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_ashr_range_6'
+; CHECK-DAG:  DemandedBits: 0xffff0000 for   %lshr.1 = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffff0000 for %a in   %lshr.1 = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %lshr.1 = ashr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %lshr.2 = ashr i32 %lshr.1, 16
+; CHECK-DAG:  DemandedBits: 0xffff0000 for %lshr.1 in   %lshr.2 = ashr i32 %lshr.1, 16
+; CHECK-DAG:  DemandedBits: 0xffffffff for 16 in   %lshr.2 = ashr i32 %lshr.1, 16
+;
+  %lshr.1 = ashr i32 %a, %b
+  %lshr.2 = ashr i32 %lshr.1, 16
+  ret i32 %lshr.2
+}
+
+define i8 @test_ashr_var_amount(i32 %a, i32 %b){
+; CHECK-LABEL: 'test_ashr_var_amount'
+; CHECK-DAG: DemandedBits: 0xff for   %4 = ashr i32 %1, %3
+; CHECK-DAG: DemandedBits: 0xffffffff for %1 in   %4 = ashr i32 %1, %3
+; CHECK-DAG: DemandedBits: 0xffffffff for %3 in   %4 = ashr i32 %1, %3
+; CHECK-DAG: DemandedBits: 0xff for   %2 = trunc i32 %1 to i8
+; CHECK-DAG: DemandedBits: 0xff for %1 in   %2 = trunc i32 %1 to i8
+; CHECK-DAG: DemandedBits: 0xffffffff for   %1 = add nsw i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %1 = add nsw i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %1 = add nsw i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for   %3 = zext i8 %2 to i32
+; CHECK-DAG: DemandedBits: 0xff for %2 in   %3 = zext i8 %2 to i32
+; CHECK-DAG: DemandedBits: 0xff for   %5 = trunc i32 %4 to i8
+; CHECK-DAG: DemandedBits: 0xff for %4 in   %5 = trunc i32 %4 to i8
+;
+  %1 = add nsw i32 %a, %b
+  %2 = trunc i32 %1 to i8
+  %3 = zext i8 %2 to i32
+  %4 = ashr i32 %1, %3
+  %5 = trunc i32 %4 to i8
+  ret i8 %5
+}
+
+define i8 @test_ashr_var_amount_nsw(i32 %a, i32 %b){
+ ; CHECK-LABEL 'test_ashr_var_amount_nsw'
+ ; CHECK-DAG: DemandedBits: 0xff for   %5 = trunc i32 %4 to i8
+ ; CHECK-DAG: DemandedBits: 0xff for %4 in   %5 = trunc i32 %4 to i8
+ ; CHECK-DAG: DemandedBits: 0xffffffff for   %1 = add nsw i32 %a, %b
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %1 = add nsw i32 %a, %b
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %1 = add nsw i32 %a, %b
+ ; CHECK-DAG: DemandedBits: 0xff for   %2 = trunc i32 %1 to i8
+ ; CHECK-DAG: DemandedBits: 0xff for %1 in   %2 = trunc i32 %1 to i8
+ ; CHECK-DAG: DemandedBits: 0xffffffff for   %3 = zext i8 %2 to i32
+ ; CHECK-DAG: DemandedBits: 0xff for %2 in   %3 = zext i8 %2 to i32
+ ; CHECK-DAG: DemandedBits: 0xff for   %4 = ashr exact i32 %1, %3
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %1 in   %4 = ashr exact i32 %1, %3
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %3 in   %4 = ashr exact i32 %1, %3
+ ;
+  %1 = add nsw i32 %a, %b
+  %2 = trunc i32 %1 to i8
+  %3 = zext i8 %2 to i32
+  %4 = ashr exact i32 %1, %3
+  %5 = trunc i32 %4 to i8
+  ret i8 %5
+}
diff --git a/llvm/test/Analysis/DemandedBits/lshr.ll b/llvm/test/Analysis/DemandedBits/lshr.ll
new file mode 100644
index 0000000..e07f994
--- /dev/null
+++ b/llvm/test/Analysis/DemandedBits/lshr.ll
@@ -0,0 +1,198 @@
+; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
+
+define i8 @test_lshr_const_amount_4(i32 %a) {
+; CHECK-LABEL: 'test_lshr_const_amount_4'
+; CHECK-DAG: DemandedBits: 0xff for   %lshr.t = trunc i32 %lshr to i8
+; CHECK-DAG: DemandedBits: 0xff for %lshr in   %lshr.t = trunc i32 %lshr to i8
+; CHECK-DAG: DemandedBits: 0xff for   %lshr = lshr i32 %a, 4
+; CHECK-DAG: DemandedBits: 0xff0 for %a in   %lshr = lshr i32 %a, 4
+; CHECK-DAG: DemandedBits: 0xffffffff for 4 in   %lshr = lshr i32 %a, 4
+;
+  %lshr = lshr i32 %a, 4
+  %lshr.t = trunc i32 %lshr to i8
+  ret i8 %lshr.t
+}
+
+define i8 @test_lshr_const_amount_5(i32 %a) {
+; CHECK-LABEL: 'test_lshr_const_amount_5'
+; CHECK-DAG: DemandedBits: 0xff for   %lshr = lshr i32 %a, 5
+; CHECK-DAG: DemandedBits: 0x1fe0 for %a in   %lshr = lshr i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xffffffff for 5 in   %lshr = lshr i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xff for   %lshr.t = trunc i32 %lshr to i8
+; CHECK-DAG: DemandedBits: 0xff for %lshr in   %lshr.t = trunc i32 %lshr to i8
+;
+  %lshr = lshr i32 %a, 5
+  %lshr.t = trunc i32 %lshr to i8
+  ret i8 %lshr.t
+}
+define i8 @test_lshr_const_amount_8(i32 %a) {
+; CHECK-LABEL: 'test_lshr_const_amount_8'
+; CHECK-DAG: DemandedBits: 0xff for   %lshr.t = trunc i32 %lshr to i8
+; CHECK-DAG: DemandedBits: 0xff for %lshr in   %lshr.t = trunc i32 %lshr to i8
+; CHECK-DAG: DemandedBits: 0xff for   %lshr = lshr i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xff00 for %a in   %lshr = lshr i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xffffffff for 8 in   %lshr = lshr i32 %a, 8
+;
+  %lshr = lshr i32 %a, 8
+  %lshr.t = trunc i32 %lshr to i8
+  ret i8 %lshr.t
+}
+
+define i8 @test_lshr_const_amount_9(i32 %a) {
+; CHECK-LABEL: 'test_lshr_const_amount_9'
+; CHECK-DAG: DemandedBits: 0xff for   %lshr.t = trunc i32 %lshr to i8
+; CHECK-DAG: DemandedBits: 0xff for %lshr in   %lshr.t = trunc i32 %lshr to i8
+; CHECK-DAG: DemandedBits: 0xff for   %lshr = lshr i32 %a, 9
+; CHECK-DAG: DemandedBits: 0x1fe00 for %a in   %lshr = lshr i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xffffffff for 9 in   %lshr = lshr i32 %a, 9
+;
+  %lshr = lshr i32 %a, 9
+  %lshr.t = trunc i32 %lshr to i8
+  ret i8 %lshr.t
+}
+
+define i8 @test_lshr(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_lshr'
+; CHECK-DAG: DemandedBits: 0xff for   %lshr = lshr i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %lshr = lshr i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %lshr = lshr i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xff for   %lshr.t = trunc i32 %lshr to i8
+; CHECK-DAG: DemandedBits: 0xff for %lshr in   %lshr.t = trunc i32 %lshr to i8
+;
+  %lshr = lshr i32 %a, %b
+  %lshr.t = trunc i32 %lshr to i8
+  ret i8 %lshr.t
+}
+
+define i8 @test_lshr_range_1(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_lshr_range_1'
+; CHECK-DAG:  DemandedBits: 0xff for   %shl.t = trunc i32 %lshr to i8
+; CHECK-DAG:  DemandedBits: 0xff for %lshr in   %shl.t = trunc i32 %lshr to i8
+; CHECK-DAG:  DemandedBits: 0xff for   %lshr = lshr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0x7ff for %a in   %lshr = lshr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b2 in   %lshr = lshr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0x3 for %b in   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0xffffffff for 3 in   %b2 = and i32 %b, 3
+;
+  %b2 = and i32 %b, 3
+  %lshr = lshr i32 %a, %b2
+  %shl.t = trunc i32 %lshr to i8
+  ret i8 %shl.t
+}
+
+define i32 @test_lshr_range_2(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_lshr_range_2'
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %lshr = lshr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for %a in   %lshr = lshr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b2 in   %lshr = lshr i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0x3 for %b in   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0xffffffff for 3 in   %b2 = and i32 %b, 3
+;
+  %b2 = and i32 %b, 3
+  %lshr = lshr i32 %a, %b2
+  ret i32 %lshr
+}
+
+define i32 @test_lshr_range_3(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_lshr_range_3'
+; CHECK-DAG:  DemandedBits: 0xffff for   %lshr = lshr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %a in   %lshr = lshr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %lshr = lshr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %shl = shl i32 %lshr, 16
+; CHECK-DAG:  DemandedBits: 0xffff for %lshr in   %shl = shl i32 %lshr, 16
+; CHECK-DAG:  DemandedBits: 0xffffffff for 16 in   %shl = shl i32 %lshr, 16
+;
+  %lshr = lshr i32 %a, %b
+  %shl = shl i32 %lshr, 16
+  ret i32 %shl
+}
+
+define i32 @test_lshr_range_4(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_lshr_range_4'
+; CHECK-DAG:  DemandedBits: 0xffffff00 for   %lshr = lshr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffff00 for %a in   %lshr = lshr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %lshr = lshr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %shr = ashr i32 %lshr, 8
+; CHECK-DAG:  DemandedBits: 0xffffff00 for %lshr in   %shr = ashr i32 %lshr, 8
+; CHECK-DAG:  DemandedBits: 0xffffffff for 8 in   %shr = ashr i32 %lshr, 8
+  %lshr = lshr i32 %a, %b
+  %shr = ashr i32 %lshr, 8
+  ret i32 %shr
+}
+
+define i32 @test_lshr_range_5(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_lshr_range_5'
+; CHECK-DAG:  DemandedBits: 0xff for   %1 = lshr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %a in   %1 = lshr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %1 = lshr i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %2 = and i32 %1, 255
+; CHECK-DAG:  DemandedBits: 0xff for %1 in   %2 = and i32 %1, 255
+; CHECK-DAG:  DemandedBits: 0xffffffff for 255 in   %2 = and i32 %1, 255
+;
+  %1 = lshr i32 %a, %b
+  %2 = and i32 %1, 255
+  ret i32 %2
+}
+
+define i32 @test_lshr_range_6(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_lshr_range_6'
+; CHECK-DAG: DemandedBits: 0xffff0000 for   %lshr.1 = lshr i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffff0000 for %a in   %lshr.1 = lshr i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %lshr.1 = lshr i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for   %lshr.2 = lshr i32 %lshr.1, 16
+; CHECK-DAG: DemandedBits: 0xffff0000 for %lshr.1 in   %lshr.2 = lshr i32 %lshr.1, 16
+; CHECK-DAG: DemandedBits: 0xffffffff for 16 in   %lshr.2 = lshr i32 %lshr.1, 16
+;
+  %lshr.1 = lshr i32 %a, %b
+  %lshr.2 = lshr i32 %lshr.1, 16
+  ret i32 %lshr.2
+}
+
+
+define i8 @test_lshr_var_amount(i32 %a, i32 %b){
+; CHECK-LABEL: 'test_lshr_var_amount'
+; CHECK-DAG: DemandedBits: 0xff for   %4 = lshr i32 %1, %3
+; CHECK-DAG: DemandedBits: 0xffffffff for %1 in   %4 = lshr i32 %1, %3
+; CHECK-DAG: DemandedBits: 0xffffffff for %3 in   %4 = lshr i32 %1, %3
+; CHECK-DAG: DemandedBits: 0xff for   %5 = trunc i32 %4 to i8
+; CHECK-DAG: DemandedBits: 0xff for %4 in   %5 = trunc i32 %4 to i8
+; CHECK-DAG: DemandedBits: 0xffffffff for   %3 = zext i8 %2 to i32
+; CHECK-DAG: DemandedBits: 0xff for %2 in   %3 = zext i8 %2 to i32
+; CHECK-DAG: DemandedBits: 0xffffffff for   %1 = add nsw i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %1 = add nsw i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %1 = add nsw i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xff for   %2 = trunc i32 %1 to i8
+; CHECK-DAG: DemandedBits: 0xff for %1 in   %2 = trunc i32 %1 to i8
+;
+  %1 = add nsw i32 %a, %b
+  %2 = trunc i32 %1 to i8
+  %3 = zext i8 %2 to i32
+  %4 = lshr i32 %1, %3
+  %5 = trunc i32 %4 to i8
+  ret i8 %5
+}
+
+define i8 @test_lshr_var_amount_exact(i32 %a, i32 %b){
+ ; CHECK-LABEL 'test_lshr_var_amount_nsw'
+ ; CHECK-DAG: DemandedBits: 0xffffffff for   %1 = add nsw i32 %a, %b
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %1 = add nsw i32 %a, %b
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %1 = add nsw i32 %a, %b
+ ; CHECK-DAG: DemandedBits: 0xff for   %2 = trunc i32 %1 to i8
+ ; CHECK-DAG: DemandedBits: 0xff for %1 in   %2 = trunc i32 %1 to i8
+ ; CHECK-DAG: DemandedBits: 0xffffffff for   %3 = zext i8 %2 to i32
+ ; CHECK-DAG: DemandedBits: 0xff for %2 in   %3 = zext i8 %2 to i32
+ ; CHECK-DAG: DemandedBits: 0xff for   %4 = lshr exact i32 %1, %3
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %1 in   %4 = lshr exact i32 %1, %3
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %3 in   %4 = lshr exact i32 %1, %3
+ ; CHECK-DAG: DemandedBits: 0xff for   %5 = trunc i32 %4 to i8
+ ; CHECK-DAG: DemandedBits: 0xff for %4 in   %5 = trunc i32 %4 to i8
+ ;
+  %1 = add nsw i32 %a, %b
+  %2 = trunc i32 %1 to i8
+  %3 = zext i8 %2 to i32
+  %4 = lshr exact i32 %1, %3
+  %5 = trunc i32 %4 to i8
+  ret i8 %5
+}
diff --git a/llvm/test/Analysis/DemandedBits/shl.ll b/llvm/test/Analysis/DemandedBits/shl.ll
index e41f5f4..c872d2d 100644
--- a/llvm/test/Analysis/DemandedBits/shl.ll
+++ b/llvm/test/Analysis/DemandedBits/shl.ll
@@ -57,10 +57,142 @@ define i8 @test_shl(i32 %a, i32 %b) {
 ; CHECK-DAG:  DemandedBits: 0xff for %shl.t = trunc i32 %shl to i8
 ; CHECK-DAG:  DemandedBits: 0xff for %shl in %shl.t = trunc i32 %shl to i8
 ; CHECK-DAG:  DemandedBits: 0xff for %shl = shl i32 %a, %b
-; CHECK-DAG:  DemandedBits: 0xffffffff for %a in %shl = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xff for %a in %shl = shl i32 %a, %b
 ; CHECK-DAG:  DemandedBits: 0xffffffff for %b in %shl = shl i32 %a, %b
 ;
   %shl = shl i32 %a, %b
   %shl.t = trunc i32 %shl to i8
   ret i8 %shl.t
 }
+
+define i8 @test_shl_range_1(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_shl_range_1'
+; CHECK-DAG:  DemandedBits: 0xff for   %shl = shl i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xff for %a in   %shl = shl i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b2 in   %shl = shl i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xff for   %shl.t = trunc i32 %shl to i8
+; CHECK-DAG:  DemandedBits: 0xff for %shl in   %shl.t = trunc i32 %shl to i8
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0x3 for %b in   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0xffffffff for 3 in   %b2 = and i32 %b, 3
+;
+  %b2 = and i32 %b, 3
+  %shl = shl i32 %a, %b2
+  %shl.t = trunc i32 %shl to i8
+  ret i8 %shl.t
+}
+
+define i32 @test_shl_range_2(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_shl_range_2'
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0x3 for %b in   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0xffffffff for 3 in   %b2 = and i32 %b, 3
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %shl = shl i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for %a in   %shl = shl i32 %a, %b2
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b2 in   %shl = shl i32 %a, %b2
+;
+  %b2 = and i32 %b, 3
+  %shl = shl i32 %a, %b2
+  ret i32 %shl
+}
+
+define i32 @test_shl_range_3(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_shl_range_3'
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %shr = lshr i32 %shl, 16
+; CHECK-DAG:  DemandedBits: 0xffff0000 for %shl in   %shr = lshr i32 %shl, 16
+; CHECK-DAG:  DemandedBits: 0xffffffff for 16 in   %shr = lshr i32 %shl, 16
+; CHECK-DAG:  DemandedBits: 0xffff0000 for   %shl = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %a in   %shl = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %shl = shl i32 %a, %b
+;
+  %shl = shl i32 %a, %b
+  %shr = lshr i32 %shl, 16
+  ret i32 %shr
+}
+
+define i32 @test_shl_range_4(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_shl_range_4'
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %shr = ashr i32 %shl, 8
+; CHECK-DAG:  DemandedBits: 0xffffff00 for %shl in   %shr = ashr i32 %shl, 8
+; CHECK-DAG:  DemandedBits: 0xffffffff for 8 in   %shr = ashr i32 %shl, 8
+; CHECK-DAG:  DemandedBits: 0xffffff00 for   %shl = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %a in   %shl = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %shl = shl i32 %a, %b
+  %shl = shl i32 %a, %b
+  %shr = ashr i32 %shl, 8
+  ret i32 %shr
+}
+
+define i32 @test_shl_range_5(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_shl_range_5'
+; CHECK-DAG:  DemandedBits: 0xff for   %1 = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xff for %a in   %1 = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %1 = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %2 = and i32 %1, 255
+; CHECK-DAG:  DemandedBits: 0xff for %1 in   %2 = and i32 %1, 255
+; CHECK-DAG:  DemandedBits: 0xffffffff for 255 in   %2 = and i32 %1, 255
+;
+  %1 = shl i32 %a, %b
+  %2 = and i32 %1, 255
+  ret i32 %2
+}
+
+define i32 @test_shl_range_6(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_shl_range_6'
+; CHECK-DAG:  DemandedBits: 0xffffffff for   %shl.2 = shl i32 %shl.1, 16
+; CHECK-DAG:  DemandedBits: 0xffff for %shl.1 in   %shl.2 = shl i32 %shl.1, 16
+; CHECK-DAG:  DemandedBits: 0xffffffff for 16 in   %shl.2 = shl i32 %shl.1, 16
+; CHECK-DAG:  DemandedBits: 0xffff for   %shl.1 = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffff for %a in   %shl.1 = shl i32 %a, %b
+; CHECK-DAG:  DemandedBits: 0xffffffff for %b in   %shl.1 = shl i32 %a, %b
+;
+  %shl.1 = shl i32 %a, %b
+  %shl.2 = shl i32 %shl.1, 16
+  ret i32 %shl.2
+}
+
+define i8 @test_shl_var_amount(i32 %a, i32 %b){
+; CHECK-LABEL: 'test_shl_var_amount'
+; CHECK-DAG: DemandedBits: 0xff for   %5 = trunc i32 %4 to i8
+; CHECK-DAG: DemandedBits: 0xff for %4 in   %5 = trunc i32 %4 to i8
+; CHECK-DAG: DemandedBits: 0xff for   %4 = shl i32 %1, %3
+; CHECK-DAG: DemandedBits: 0xff for %1 in   %4 = shl i32 %1, %3
+; CHECK-DAG: DemandedBits: 0xffffffff for %3 in   %4 = shl i32 %1, %3
+; CHECK-DAG: DemandedBits: 0xff for   %2 = trunc i32 %1 to i8
+; CHECK-DAG: DemandedBits: 0xff for %1 in   %2 = trunc i32 %1 to i8
+; CHECK-DAG: DemandedBits: 0xffffffff for   %3 = zext i8 %2 to i32
+; CHECK-DAG: DemandedBits: 0xff for %2 in   %3 = zext i8 %2 to i32
+; CHECK-DAG: DemandedBits: 0xff for   %1 = add nsw i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xff for %a in   %1 = add nsw i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xff for %b in   %1 = add nsw i32 %a, %b
+;
+  %1 = add nsw i32 %a, %b
+  %2 = trunc i32 %1 to i8
+  %3 = zext i8 %2 to i32
+  %4 = shl i32 %1, %3
+  %5 = trunc i32 %4 to i8
+  ret i8 %5
+}
+
+define i8 @test_shl_var_amount_nsw(i32 %a, i32 %b){
+ ; CHECK-LABEL 'test_shl_var_amount_nsw'
+ ; CHECK-DAG: DemandedBits: 0xff for   %5 = trunc i32 %4 to i8
+ ; CHECK-DAG: DemandedBits: 0xff for %4 in   %5 = trunc i32 %4 to i8
+ ; CHECK-DAG: DemandedBits: 0xff for   %4 = shl nsw i32 %1, %3
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %1 in   %4 = shl nsw i32 %1, %3
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %3 in   %4 = shl nsw i32 %1, %3
+ ; CHECK-DAG: DemandedBits: 0xffffffff for   %3 = zext i8 %2 to i32
+ ; CHECK-DAG: DemandedBits: 0xff for %2 in   %3 = zext i8 %2 to i32
+ ; CHECK-DAG: DemandedBits: 0xff for   %2 = trunc i32 %1 to i8
+ ; CHECK-DAG: DemandedBits: 0xff for %1 in   %2 = trunc i32 %1 to i8
+ ; CHECK-DAG: DemandedBits: 0xffffffff for   %1 = add nsw i32 %a, %b
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %1 = add nsw i32 %a, %b
+ ; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %1 = add nsw i32 %a, %b
+ ;
+  %1 = add nsw i32 %a, %b
+  %2 = trunc i32 %1 to i8
+  %3 = zext i8 %2 to i32
+  %4 = shl nsw i32 %1, %3
+  %5 = trunc i32 %4 to i8
+  ret i8 %5
+}
diff --git a/llvm/test/Analysis/IR2Vec/Inputs/dummy_2D_vocab.json b/llvm/test/Analysis/IR2Vec/Inputs/dummy_2D_vocab.json
index 9b38f2e..07fde84 100644
--- a/llvm/test/Analysis/IR2Vec/Inputs/dummy_2D_vocab.json
+++ b/llvm/test/Analysis/IR2Vec/Inputs/dummy_2D_vocab.json
@@ -47,6 +47,7 @@
         "FPTrunc": [89, 90],
         "FPExt": [91, 92],
         "PtrToInt": [93, 94],
+        "PtrToAddr": [135, 136],
         "IntToPtr": [95, 96],
         "BitCast": [97, 98],
         "AddrSpaceCast": [99, 100],
diff --git a/llvm/test/Analysis/IR2Vec/Inputs/dummy_3D_nonzero_type_vocab.json b/llvm/test/Analysis/IR2Vec/Inputs/dummy_3D_nonzero_type_vocab.json
index bb97a49..fcc1344 100644
--- a/llvm/test/Analysis/IR2Vec/Inputs/dummy_3D_nonzero_type_vocab.json
+++ b/llvm/test/Analysis/IR2Vec/Inputs/dummy_3D_nonzero_type_vocab.json
@@ -47,6 +47,7 @@
         "FPTrunc": [0, 0, 0],
         "FPExt": [0, 0, 0],
         "PtrToInt": [0, 0, 0],
+        "PtrToAddr": [0, 0, 0],
         "IntToPtr": [0, 0, 0],
         "BitCast": [0, 0, 0],
         "AddrSpaceCast": [0, 0, 0],
diff --git a/llvm/test/Analysis/IR2Vec/Inputs/reference_default_vocab_print.txt b/llvm/test/Analysis/IR2Vec/Inputs/reference_default_vocab_print.txt
index 79fcf82..df7769c 100644
--- a/llvm/test/Analysis/IR2Vec/Inputs/reference_default_vocab_print.txt
+++ b/llvm/test/Analysis/IR2Vec/Inputs/reference_default_vocab_print.txt
@@ -45,6 +45,7 @@ Key: SIToFP:  [ 87.00  88.00 ]
 Key: FPTrunc:  [ 89.00  90.00 ]
 Key: FPExt:  [ 91.00  92.00 ]
 Key: PtrToInt:  [ 93.00  94.00 ]
+Key: PtrToAddr:  [ 135.00  136.00 ]
 Key: IntToPtr:  [ 95.00  96.00 ]
 Key: BitCast:  [ 97.00  98.00 ]
 Key: AddrSpaceCast:  [ 99.00  100.00 ]
@@ -66,25 +67,16 @@ Key: InsertValue:  [ 129.00  130.00 ]
 Key: LandingPad:  [ 131.00  132.00 ]
 Key: Freeze:  [ 133.00  134.00 ]
 Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
 Key: VoidTy:  [ 1.50  2.00 ]
 Key: LabelTy:  [ 2.50  3.00 ]
 Key: MetadataTy:  [ 3.50  4.00 ]
-Key: UnknownTy:  [ 4.50  5.00 ]
+Key: VectorTy:  [ 11.50  12.00 ]
 Key: TokenTy:  [ 5.50  6.00 ]
 Key: IntegerTy:  [ 6.50  7.00 ]
 Key: FunctionTy:  [ 7.50  8.00 ]
 Key: PointerTy:  [ 8.50  9.00 ]
 Key: StructTy:  [ 9.50  10.00 ]
 Key: ArrayTy:  [ 10.50  11.00 ]
-Key: VectorTy:  [ 11.50  12.00 ]
-Key: VectorTy:  [ 11.50  12.00 ]
-Key: PointerTy:  [ 8.50  9.00 ]
 Key: UnknownTy:  [ 4.50  5.00 ]
 Key: Function:  [ 0.20  0.40 ]
 Key: Pointer:  [ 0.60  0.80 ]
diff --git a/llvm/test/Analysis/IR2Vec/Inputs/reference_wtd1_vocab_print.txt b/llvm/test/Analysis/IR2Vec/Inputs/reference_wtd1_vocab_print.txt
index 584bd31..f3ce809 100644
--- a/llvm/test/Analysis/IR2Vec/Inputs/reference_wtd1_vocab_print.txt
+++ b/llvm/test/Analysis/IR2Vec/Inputs/reference_wtd1_vocab_print.txt
@@ -45,6 +45,7 @@ Key: SIToFP:  [ 43.50  44.00 ]
 Key: FPTrunc:  [ 44.50  45.00 ]
 Key: FPExt:  [ 45.50  46.00 ]
 Key: PtrToInt:  [ 46.50  47.00 ]
+Key: PtrToAddr:  [ 67.50  68.00 ]
 Key: IntToPtr:  [ 47.50  48.00 ]
 Key: BitCast:  [ 48.50  49.00 ]
 Key: AddrSpaceCast:  [ 49.50  50.00 ]
@@ -66,25 +67,16 @@ Key: InsertValue:  [ 64.50  65.00 ]
 Key: LandingPad:  [ 65.50  66.00 ]
 Key: Freeze:  [ 66.50  67.00 ]
 Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
-Key: FloatTy:  [ 0.50  1.00 ]
 Key: VoidTy:  [ 1.50  2.00 ]
 Key: LabelTy:  [ 2.50  3.00 ]
 Key: MetadataTy:  [ 3.50  4.00 ]
-Key: UnknownTy:  [ 4.50  5.00 ]
+Key: VectorTy:  [ 11.50  12.00 ]
 Key: TokenTy:  [ 5.50  6.00 ]
 Key: IntegerTy:  [ 6.50  7.00 ]
 Key: FunctionTy:  [ 7.50  8.00 ]
 Key: PointerTy:  [ 8.50  9.00 ]
 Key: StructTy:  [ 9.50  10.00 ]
 Key: ArrayTy:  [ 10.50  11.00 ]
-Key: VectorTy:  [ 11.50  12.00 ]
-Key: VectorTy:  [ 11.50  12.00 ]
-Key: PointerTy:  [ 8.50  9.00 ]
 Key: UnknownTy:  [ 4.50  5.00 ]
 Key: Function:  [ 0.50  1.00 ]
 Key: Pointer:  [ 1.50  2.00 ]
diff --git a/llvm/test/Analysis/IR2Vec/Inputs/reference_wtd2_vocab_print.txt b/llvm/test/Analysis/IR2Vec/Inputs/reference_wtd2_vocab_print.txt
index 2727c85..72b25b9 100644
--- a/llvm/test/Analysis/IR2Vec/Inputs/reference_wtd2_vocab_print.txt
+++ b/llvm/test/Analysis/IR2Vec/Inputs/reference_wtd2_vocab_print.txt
@@ -45,6 +45,7 @@ Key: SIToFP:  [ 8.70  8.80 ]
 Key: FPTrunc:  [ 8.90  9.00 ]
 Key: FPExt:  [ 9.10  9.20 ]
 Key: PtrToInt:  [ 9.30  9.40 ]
+Key: PtrToAddr:  [ 13.50  13.60 ]
 Key: IntToPtr:  [ 9.50  9.60 ]
 Key: BitCast:  [ 9.70  9.80 ]
 Key: AddrSpaceCast:  [ 9.90  10.00 ]
@@ -66,25 +67,16 @@ Key: InsertValue:  [ 12.90  13.00 ]
 Key: LandingPad:  [ 13.10  13.20 ]
 Key: Freeze:  [ 13.30  13.40 ]
 Key: FloatTy:  [ 0.00  0.00 ]
-Key: FloatTy:  [ 0.00  0.00 ]
-Key: FloatTy:  [ 0.00  0.00 ]
-Key: FloatTy:  [ 0.00  0.00 ]
-Key: FloatTy:  [ 0.00  0.00 ]
-Key: FloatTy:  [ 0.00  0.00 ]
-Key: FloatTy:  [ 0.00  0.00 ]
 Key: VoidTy:  [ 0.00  0.00 ]
 Key: LabelTy:  [ 0.00  0.00 ]
 Key: MetadataTy:  [ 0.00  0.00 ]
-Key: UnknownTy:  [ 0.00  0.00 ]
+Key: VectorTy:  [ 0.00  0.00 ]
 Key: TokenTy:  [ 0.00  0.00 ]
 Key: IntegerTy:  [ 0.00  0.00 ]
 Key: FunctionTy:  [ 0.00  0.00 ]
 Key: PointerTy:  [ 0.00  0.00 ]
 Key: StructTy:  [ 0.00  0.00 ]
 Key: ArrayTy:  [ 0.00  0.00 ]
-Key: VectorTy:  [ 0.00  0.00 ]
-Key: VectorTy:  [ 0.00  0.00 ]
-Key: PointerTy:  [ 0.00  0.00 ]
 Key: UnknownTy:  [ 0.00  0.00 ]
 Key: Function:  [ 0.00  0.00 ]
 Key: Pointer:  [ 0.00  0.00 ]
diff --git a/llvm/test/Analysis/IR2Vec/basic-flowaware.ll b/llvm/test/Analysis/IR2Vec/basic-flowaware.ll
new file mode 100644
index 0000000..4a7f970
--- /dev/null
+++ b/llvm/test/Analysis/IR2Vec/basic-flowaware.ll
@@ -0,0 +1,72 @@
+; RUN: opt -passes='print<ir2vec>' -ir2vec-kind=flow-aware -o /dev/null -ir2vec-vocab-path=%S/Inputs/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=3D-CHECK-OPC
+; RUN: opt -passes='print<ir2vec>' -ir2vec-kind=flow-aware -o /dev/null -ir2vec-vocab-path=%S/Inputs/dummy_3D_nonzero_type_vocab.json %s 2>&1 | FileCheck %s -check-prefix=3D-CHECK-TYPE
+; RUN: opt -passes='print<ir2vec>' -ir2vec-kind=flow-aware -o /dev/null -ir2vec-vocab-path=%S/Inputs/dummy_3D_nonzero_arg_vocab.json %s 2>&1 | FileCheck %s -check-prefix=3D-CHECK-ARG
+
+define dso_local noundef float @_Z3abcif(i32 noundef %a, float noundef %b) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca float, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store float %b, ptr %b.addr, align 4
+  %0 = load i32, ptr %a.addr, align 4
+  %1 = load i32, ptr %a.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  %conv = sitofp i32 %mul to float
+  %2 = load float, ptr %b.addr, align 4
+  %add = fadd float %conv, %2
+  ret float %add
+}
+
+; 3D-CHECK-OPC: IR2Vec embeddings for function _Z3abcif:
+; 3D-CHECK-OPC-NEXT: Function vector: [ 3630.00 3672.00 3714.00 ]
+; 3D-CHECK-OPC-NEXT: Basic block vectors:
+; 3D-CHECK-OPC-NEXT: Basic block: entry:
+; 3D-CHECK-OPC-NEXT:  [ 3630.00 3672.00 3714.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction vectors:
+; 3D-CHECK-OPC-NEXT: Instruction:   %a.addr = alloca i32, align 4 [ 91.00  92.00  93.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   %b.addr = alloca float, align 4 [ 91.00  92.00  93.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   store i32 %a, ptr %a.addr, align 4 [ 188.00 190.00 192.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   store float %b, ptr %b.addr, align 4 [ 188.00 190.00 192.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   %0 = load i32, ptr %a.addr, align 4 [ 185.00 187.00 189.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   %1 = load i32, ptr %a.addr, align 4 [ 185.00 187.00 189.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   %mul = mul nsw i32 %0, %1 [ 419.00  424.00  429.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   %conv = sitofp i32 %mul to float [ 549.00  555.00  561.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   %2 = load float, ptr %b.addr, align 4 [ 185.00  187.00  189.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   %add = fadd float %conv, %2 [ 774.00  783.00  792.00 ]
+; 3D-CHECK-OPC-NEXT: Instruction:   ret float %add [ 775.00  785.00  795.00 ]
+
+; 3D-CHECK-TYPE: IR2Vec embeddings for function _Z3abcif:
+; 3D-CHECK-TYPE-NEXT: Function vector:  [ 355.50  376.50  397.50 ]
+; 3D-CHECK-TYPE-NEXT: Basic block vectors:
+; 3D-CHECK-TYPE-NEXT: Basic block: entry:
+; 3D-CHECK-TYPE-NEXT:  [ 355.50  376.50  397.50 ]
+; 3D-CHECK-TYPE-NEXT: Instruction vectors:
+; 3D-CHECK-TYPE-NEXT: Instruction:   %a.addr = alloca i32, align 4 [ 12.50  13.00  13.50 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   %b.addr = alloca float, align 4 [ 12.50  13.00  13.50 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   store i32 %a, ptr %a.addr, align 4 [ 14.50  15.50  16.50 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   store float %b, ptr %b.addr, align 4 [ 14.50  15.50  16.50 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   %0 = load i32, ptr %a.addr, align 4 [ 22.00  23.00  24.00 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   %1 = load i32, ptr %a.addr, align 4 [ 22.00  23.00  24.00 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   %mul = mul nsw i32 %0, %1 [ 53.50  56.00  58.50 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   %conv = sitofp i32 %mul to float [ 54.00  57.00  60.00 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   %2 = load float, ptr %b.addr, align 4 [ 13.00  14.00  15.00 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   %add = fadd float %conv, %2 [ 67.50  72.00  76.50 ]
+; 3D-CHECK-TYPE-NEXT: Instruction:   ret float %add [ 69.50  74.50  79.50 ]
+
+; 3D-CHECK-ARG: IR2Vec embeddings for function _Z3abcif:
+; 3D-CHECK-ARG-NEXT: Function vector:  [ 27.80  31.60  35.40 ]
+; 3D-CHECK-ARG-NEXT: Basic block vectors:
+; 3D-CHECK-ARG-NEXT: Basic block: entry:
+; 3D-CHECK-ARG-NEXT:  [ 27.80  31.60  35.40 ]
+; 3D-CHECK-ARG-NEXT: Instruction vectors:
+; 3D-CHECK-ARG-NEXT: Instruction:   %a.addr = alloca i32, align 4 [ 1.40  1.60  1.80 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   %b.addr = alloca float, align 4 [ 1.40  1.60  1.80 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   store i32 %a, ptr %a.addr, align 4 [ 3.40  3.80  4.20 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   store float %b, ptr %b.addr, align 4 [ 3.40  3.80  4.20 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   %0 = load i32, ptr %a.addr, align 4 [ 1.40  1.60  1.80 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   %1 = load i32, ptr %a.addr, align 4 [ 1.40  1.60  1.80 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   %mul = mul nsw i32 %0, %1 [ 2.80  3.20  3.60 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   %conv = sitofp i32 %mul to float [ 2.80  3.20  3.60 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   %2 = load float, ptr %b.addr, align 4 [ 1.40  1.60  1.80 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   %add = fadd float %conv, %2 [ 4.20  4.80  5.40 ]
+; 3D-CHECK-ARG-NEXT: Instruction:   ret float %add [ 4.20  4.80  5.40 ]
diff --git a/llvm/test/Analysis/IR2Vec/basic.ll b/llvm/test/Analysis/IR2Vec/basic-symbolic.ll
index cb0544f..35abd3c 100644
--- a/llvm/test/Analysis/IR2Vec/basic.ll
+++ b/llvm/test/Analysis/IR2Vec/basic-symbolic.ll
@@ -1,11 +1,7 @@
 ; RUN: opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=3D-CHECK-OPC
 ; RUN: opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/dummy_3D_nonzero_type_vocab.json %s 2>&1 | FileCheck %s -check-prefix=3D-CHECK-TYPE
 ; RUN: opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/dummy_3D_nonzero_arg_vocab.json %s 2>&1 | FileCheck %s -check-prefix=3D-CHECK-ARG
-; RUN: not opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/incorrect_vocab1.json %s 2>&1 | FileCheck %s -check-prefix=INCORRECT-VOCAB1-CHECK
-; RUN: not opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/incorrect_vocab2.json %s 2>&1 | FileCheck %s -check-prefix=INCORRECT-VOCAB2-CHECK
-; RUN: not opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/incorrect_vocab3.json %s 2>&1 | FileCheck %s -check-prefix=INCORRECT-VOCAB3-CHECK
-; RUN: not opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/incorrect_vocab4.json %s 2>&1 | FileCheck %s -check-prefix=INCORRECT-VOCAB4-CHECK
- 
+
 define dso_local noundef float @_Z3abcif(i32 noundef %a, float noundef %b) #0 {
 entry:
   %a.addr = alloca i32, align 4
@@ -74,11 +70,3 @@ entry:
 ; 3D-CHECK-ARG-NEXT: Instruction:   %2 = load float, ptr %b.addr, align 4 [ 0.80  1.00  1.20 ]
 ; 3D-CHECK-ARG-NEXT: Instruction:   %add = fadd float %conv, %2 [ 4.00  4.40  4.80 ]
 ; 3D-CHECK-ARG-NEXT: Instruction:   ret float %add [ 2.00  2.20  2.40 ]
-
-; INCORRECT-VOCAB1-CHECK: error: Error reading vocabulary: Missing 'Opcodes' section in vocabulary file
-
-; INCORRECT-VOCAB2-CHECK: error: Error reading vocabulary: Missing 'Types' section in vocabulary file
-
-; INCORRECT-VOCAB3-CHECK: error: Error reading vocabulary: Missing 'Arguments' section in vocabulary file
-
-; INCORRECT-VOCAB4-CHECK: error: Error reading vocabulary: Vocabulary sections have different dimensions
diff --git a/llvm/test/Analysis/IR2Vec/basic-vocab.ll b/llvm/test/Analysis/IR2Vec/basic-vocab.ll
new file mode 100644
index 0000000..eeeee83
--- /dev/null
+++ b/llvm/test/Analysis/IR2Vec/basic-vocab.ll
@@ -0,0 +1,27 @@
+; RUN: not opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/incorrect_vocab1.json %s 2>&1 | FileCheck %s -check-prefix=INCORRECT-VOCAB1-CHECK
+; RUN: not opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/incorrect_vocab2.json %s 2>&1 | FileCheck %s -check-prefix=INCORRECT-VOCAB2-CHECK
+; RUN: not opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/incorrect_vocab3.json %s 2>&1 | FileCheck %s -check-prefix=INCORRECT-VOCAB3-CHECK
+; RUN: not opt -passes='print<ir2vec>' -o /dev/null -ir2vec-vocab-path=%S/Inputs/incorrect_vocab4.json %s 2>&1 | FileCheck %s -check-prefix=INCORRECT-VOCAB4-CHECK
+ 
+define dso_local noundef float @_Z3abcif(i32 noundef %a, float noundef %b) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca float, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store float %b, ptr %b.addr, align 4
+  %0 = load i32, ptr %a.addr, align 4
+  %1 = load i32, ptr %a.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  %conv = sitofp i32 %mul to float
+  %2 = load float, ptr %b.addr, align 4
+  %add = fadd float %conv, %2
+  ret float %add
+}
+
+; INCORRECT-VOCAB1-CHECK: error: Error reading vocabulary: Missing 'Opcodes' section in vocabulary file
+
+; INCORRECT-VOCAB2-CHECK: error: Error reading vocabulary: Missing 'Types' section in vocabulary file
+
+; INCORRECT-VOCAB3-CHECK: error: Error reading vocabulary: Missing 'Arguments' section in vocabulary file
+
+; INCORRECT-VOCAB4-CHECK: error: Error reading vocabulary: Vocabulary sections have different dimensions
diff --git a/llvm/test/Analysis/KernelInfo/openmp/nvptx.ll b/llvm/test/Analysis/KernelInfo/openmp/nvptx.ll
index bd46741..da5a898 100644
--- a/llvm/test/Analysis/KernelInfo/openmp/nvptx.ll
+++ b/llvm/test/Analysis/KernelInfo/openmp/nvptx.ll
@@ -417,7 +417,7 @@ define internal noundef range(i32 -1, 1024) i32 @__kmpc_target_init(ptr nofree n
   br label %116
 
 116:                                              ; preds = %110, %128
-  call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 dereferenceable(8) %3) #20
+  call void @llvm.lifetime.start.p0(ptr noundef nonnull align 8 dereferenceable(8) %3) #20
   tail call void @llvm.nvvm.barrier.sync(i32 noundef 8)
   %117 = call zeroext i1 @__kmpc_kernel_parallel(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) %3) #20
   %118 = load ptr, ptr %3, align 8, !tbaa !93
@@ -446,11 +446,11 @@ define internal noundef range(i32 -1, 1024) i32 @__kmpc_target_init(ptr nofree n
 
 128:                                              ; preds = %126, %120
   tail call void @llvm.nvvm.barrier.sync(i32 noundef 8)
-  call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %3) #20
+  call void @llvm.lifetime.end.p0(ptr noundef nonnull %3) #20
   br label %116, !llvm.loop !94
 
 129:                                              ; preds = %116
-  call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %3) #20
+  call void @llvm.lifetime.end.p0(ptr noundef nonnull %3) #20
   br label %130
 
 130:                                              ; preds = %106, %129, %100, %98
@@ -495,7 +495,7 @@ define internal fastcc void @__assert_fail_internal(ptr noundef nonnull derefere
 declare void @llvm.assume(i1 noundef) #9
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #10
+declare void @llvm.lifetime.start.p0(ptr nocapture) #10
 
 ; Function Attrs: convergent nocallback nounwind
 declare void @llvm.nvvm.barrier.sync(i32) #11
@@ -587,7 +587,7 @@ define internal void @__kmpc_kernel_end_parallel() local_unnamed_addr #13 {
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #10
+declare void @llvm.lifetime.end.p0(ptr nocapture) #10
 
 ; Function Attrs: convergent mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite)
 declare extern_weak void @free(ptr allocptr nocapture noundef) local_unnamed_addr #14
@@ -595,11 +595,11 @@ declare extern_weak void @free(ptr allocptr nocapture noundef) local_unnamed_add
 ; Function Attrs: convergent mustprogress nounwind
 define internal noundef i32 @_ZN4ompx6printfEPKcz(ptr noundef %0, ...) local_unnamed_addr #15 {
   %2 = alloca ptr, align 8
-  call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 %2) #29
+  call void @llvm.lifetime.start.p0(ptr noundef nonnull align 8 %2) #29
   call void @llvm.va_start.p0(ptr noundef nonnull align 8 %2) #27
   %3 = load ptr, ptr %2, align 8, !tbaa !101
   %4 = call i32 @vprintf(ptr noundef %0, ptr noundef %3) #24
-  call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %2) #20
+  call void @llvm.lifetime.end.p0(ptr noundef nonnull %2) #20
   ret i32 %4
 }
 
@@ -641,7 +641,7 @@ define internal void @__kmpc_target_deinit() #4 {
   br i1 %14, label %15, label %27
 
 15:                                               ; preds = %11
-  call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 dereferenceable(8) %1) #29
+  call void @llvm.lifetime.start.p0(ptr noundef nonnull align 8 dereferenceable(8) %1) #29
   %16 = call zeroext i1 @__kmpc_kernel_parallel(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) %1) #20
   %17 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62
   %18 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83
@@ -659,7 +659,7 @@ define internal void @__kmpc_target_deinit() #4 {
 
 26:                                               ; preds = %15
   tail call void @llvm.assume(i1 noundef %23) #23
-  call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %1) #20
+  call void @llvm.lifetime.end.p0(ptr noundef nonnull %1) #20
   br label %27
 
 27:                                               ; preds = %26, %11, %10, %0
diff --git a/llvm/test/Analysis/LazyValueAnalysis/invalidation.ll b/llvm/test/Analysis/LazyValueAnalysis/invalidation.ll
index 71ea5d2..0ad1a33 100644
--- a/llvm/test/Analysis/LazyValueAnalysis/invalidation.ll
+++ b/llvm/test/Analysis/LazyValueAnalysis/invalidation.ll
@@ -17,13 +17,13 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [8 x i8] c"a = %l\0A\00", align 1
 
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.start.p0(ptr nocapture)
 
 declare void @hoo(ptr)
 
 declare i32 @printf(ptr nocapture readonly, ...)
 
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.end.p0(ptr nocapture)
 
 define void @goo(i32 %N, ptr %b) {
 entry:
@@ -38,12 +38,12 @@ for.cond:                                         ; preds = %for.body, %entry
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  call void @llvm.lifetime.start.p0(i64 8, ptr %tmp)
+  call void @llvm.lifetime.start.p0(ptr %tmp)
   call void @hoo(ptr %a.i)
   call void @hoo(ptr %c)
   %tmp1 = load volatile i64, ptr %a.i, align 8
   %call.i = call i32 (ptr, ...) @printf(ptr @.str, i64 %tmp1)
-  call void @llvm.lifetime.end.p0(i64 8, ptr %tmp)
+  call void @llvm.lifetime.end.p0(ptr %tmp)
   %inc = add nsw i32 %i.0, 1
   br label %for.cond
 
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
index 1bf8048..023a8c0 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
@@ -262,3 +262,301 @@ loop:
 exit:
   ret void
 }
+
+;       i16       i32
+; [ . . 0 0 . . 1 1] [ 1 1 0 0 . . 1 1 ]
+;  ^~~^ gep i8 = 1
+;  ^ ~~ ^ iv.2 = iv + 2
+;       ^ ~~~~~ ^ dependence distance = 4
+;              ^ ~~~~~~~~~~~~~~~~~ ^ 8
+;       ^ ~~~~~~~~~~~~~~~~ ^ 8
+;   ^ ~~~~~~~~~~~~~~~~ ^ iv.next = iv + 8
+;
+; Measurements are in bytes.
+;
+; TODO: Relax the HasSameSize check; the strided accesses are
+; independent, as determined by both the source size and the sink size.
+; This test should report no dependencies.
+define void @different_type_sizes_strided_accesses_independent(ptr %dst) {
+; CHECK-LABEL: 'different_type_sizes_strided_accesses_independent'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            store i16 0, ptr %gep.iv, align 2 ->
+; CHECK-NEXT:            store i32 1, ptr %gep.4.iv, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %gep.4 = getelementptr nuw i8, ptr %dst, i64 4
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.2 = add nuw nsw i64 %iv, 2
+  %gep.iv = getelementptr i8, ptr %dst, i64 %iv.2
+  store i16 0, ptr %gep.iv
+  %gep.4.iv = getelementptr i8, ptr %gep.4, i64 %iv.2
+  store i32 1, ptr %gep.4.iv
+  %iv.next = add nuw nsw i64 %iv, 8
+  %ec = icmp eq i64 %iv.next, 64
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+
+;     i16      i64
+; [ . 0 0 . 1 1 1 1] [ 1 x x 1 1 1 1 1 ]
+;  ^~~^ gep i8 = 1
+;  ^~~^ iv.1 = iv + 1
+;     ^ ~~ ^ dependence distance = 3
+;     ^ ~~~~~~~~~~~~~~~~ ^ 8
+;           ^ ~~~~~~~~~~~~~~~~ ^ 8
+;   ^ ~~~~~~~~~~~~~~~~ ^ iv.next = iv + 8
+;
+; TODO: Relax the HasSameSize check; this test should report a backward
+; loop-carried dependence.
+define void @different_type_sizes_strided_accesses_dependent(ptr %dst) {
+; CHECK-LABEL: 'different_type_sizes_strided_accesses_dependent'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            store i16 0, ptr %gep.iv, align 2 ->
+; CHECK-NEXT:            store i64 1, ptr %gep.3.iv, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %gep.3 = getelementptr nuw i8, ptr %dst, i64 3
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %iv.1 = add nuw nsw i64 %iv, 1
+  %gep.iv = getelementptr i8, ptr %dst, i64 %iv.1
+  store i16 0, ptr %gep.iv
+  %gep.3.iv = getelementptr i8, ptr %gep.3, i64 %iv.1
+  store i64 1, ptr %gep.3.iv
+  %iv.next = add nuw nsw i64 %iv, 8
+  %ec = icmp eq i64 %iv.next, 64
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Variant of the above, where the store size exceeds the dependence
+; distance.
+define void @different_type_sizes_strided_accesses_store_size_exceeds_depdist(ptr %dst) {
+; CHECK-LABEL: 'different_type_sizes_strided_accesses_store_size_exceeds_depdist'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            store i16 0, ptr %gep.iv, align 2 ->
+; CHECK-NEXT:            store i128 1, ptr %gep.10.iv, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %gep.10 = getelementptr nuw i8, ptr %dst, i64 10
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.iv = getelementptr i8, ptr %dst, i64 %iv
+  store i16 0, ptr %gep.iv
+  %gep.10.iv = getelementptr i8, ptr %gep.10, i64 %iv
+  store i128 1, ptr %gep.10.iv
+  %iv.next = add i64 %iv, 8
+  %ec = icmp eq i64 %iv.next, 64
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+
+; Source type-size differs from that of the sink, but when
+; determining backward dependence, only the source size
+; is relevant.
+; TODO: Relax the HasSameSize check; this test should report
+; BackwardVectorizable.
+define void @different_type_sizes_source_size_backwardvectorizible(ptr %dst) {
+; CHECK-LABEL: 'different_type_sizes_source_size_backwardvectorizible'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            store i16 0, ptr %gep.iv, align 2 ->
+; CHECK-NEXT:            store i32 1, ptr %gep.10.iv, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %gep.10 = getelementptr nuw i8, ptr %dst, i64 10
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.iv = getelementptr i8, ptr %dst, i64 %iv
+  store i16 0, ptr %gep.iv
+  %gep.10.iv = getelementptr i8, ptr %gep.10, i64 %iv
+  store i32 1, ptr %gep.10.iv
+  %iv.next = add i64 %iv, 8
+  %ec = icmp eq i64 %iv.next, 64
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Source type-size differs from that of the sink, and when
+; determining forward dependence, the source size can
+; prevent forwarding.
+define void @different_type_sizes_forward(ptr %dst) {
+; CHECK-LABEL: 'different_type_sizes_forward'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            store i32 0, ptr %gep.10.iv, align 4 ->
+; CHECK-NEXT:            store i16 1, ptr %gep.iv, align 2
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %gep.10 = getelementptr nuw i8, ptr %dst, i64 10
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.10.iv = getelementptr i8, ptr %gep.10, i64 %iv
+  store i32 0, ptr %gep.10.iv
+  %gep.iv = getelementptr i8, ptr %dst, i64 %iv
+  store i16 1, ptr %gep.iv
+  %iv.next = add i64 %iv, 8
+  %ec = icmp eq i64 %iv.next, 64
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Same as the above, but here, the store size should not prevent
+; ld->st forwarding.
+; TODO: Relax the HasSameSize check; this test should report a
+; forward dependence.
+define void @different_type_sizes_store_size_cannot_prevent_forwarding(ptr %A, ptr noalias %B) {
+; CHECK-LABEL: 'different_type_sizes_store_size_cannot_prevent_forwarding'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Forward loop carried data dependence that prevents store-to-load forwarding.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        ForwardButPreventsForwarding:
+; CHECK-NEXT:            store i32 0, ptr %gep.A, align 4 ->
+; CHECK-NEXT:            %l = load i16, ptr %gep.A.1, align 2
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %A.1 = getelementptr i32, ptr %A, i64 1
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 1022, %entry ], [ %iv.next, %loop ]
+  %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
+  store i32 0, ptr %gep.A
+  %gep.A.1 = getelementptr i32, ptr %A.1, i64 %iv
+  %l = load i16, ptr %gep.A.1
+  store i16 %l, ptr %B
+  %iv.next = add nsw i64 %iv, -1
+  %cmp = icmp eq i64 %iv, 0
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Same as the above, but here, the load size prevents
+; ld->st forwarding.
+define void @different_type_sizes_load_size_prevents_forwarding(ptr %A, ptr noalias %B) {
+; CHECK-LABEL: 'different_type_sizes_load_size_prevents_forwarding'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Forward loop carried data dependence that prevents store-to-load forwarding.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        ForwardButPreventsForwarding:
+; CHECK-NEXT:            store i16 0, ptr %gep.A, align 2 ->
+; CHECK-NEXT:            %l = load i32, ptr %gep.A.1, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %A.1 = getelementptr i32, ptr %A, i64 1
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 1022, %entry ], [ %iv.next, %loop ]
+  %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
+  store i16 0, ptr %gep.A
+  %gep.A.1 = getelementptr i32, ptr %A.1, i64 %iv
+  %l = load i32, ptr %gep.A.1
+  store i32 %l, ptr %B
+  %iv.next = add nsw i64 %iv, -1
+  %cmp = icmp eq i64 %iv, 0
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-max-dependences.ll b/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-max-dependences.ll
new file mode 100644
index 0000000..5b17920
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-max-dependences.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck --check-prefix=DEFAULT %s
+; RUN: opt -passes='print<access-info>' -disable-output -max-dependences=5 %s 2>&1 | FileCheck --check-prefix=MAXDEP5 %s
+
+define void @dependences_with_strides(ptr %dst, i64 %stride) {
+; DEFAULT-LABEL: 'dependences_with_strides'
+; DEFAULT-NEXT:    loop:
+; DEFAULT-NEXT:      Memory dependences are safe with run-time checks
+; DEFAULT-NEXT:      Dependences:
+; DEFAULT-NEXT:      Run-time memory checks:
+; DEFAULT-NEXT:      Check 0:
+; DEFAULT-NEXT:        Comparing group GRP0:
+; DEFAULT-NEXT:          %ptr.iv = phi ptr [ %dst, %entry ], [ %ptr.iv.next, %loop ]
+; DEFAULT-NEXT:        Against group GRP1:
+; DEFAULT-NEXT:          %gep.stride = getelementptr i16, ptr %ptr.iv, i64 %stride
+; DEFAULT-NEXT:      Check 1:
+; DEFAULT-NEXT:        Comparing group GRP0:
+; DEFAULT-NEXT:          %ptr.iv = phi ptr [ %dst, %entry ], [ %ptr.iv.next, %loop ]
+; DEFAULT-NEXT:        Against group GRP2:
+; DEFAULT-NEXT:        ptr %dst
+; DEFAULT-NEXT:      Check 2:
+; DEFAULT-NEXT:        Comparing group GRP1:
+; DEFAULT-NEXT:          %gep.stride = getelementptr i16, ptr %ptr.iv, i64 %stride
+; DEFAULT-NEXT:        Against group GRP2:
+; DEFAULT-NEXT:        ptr %dst
+; DEFAULT-NEXT:      Grouped accesses:
+; DEFAULT-NEXT:        Group GRP0:
+; DEFAULT-NEXT:          (Low: %dst High: (4 + %dst))
+; DEFAULT-NEXT:            Member: {%dst,+,2}<nw><%loop>
+; DEFAULT-NEXT:        Group GRP1:
+; DEFAULT-NEXT:          (Low: ((2 * %stride) + %dst) High: (4 + (2 * %stride) + %dst))
+; DEFAULT-NEXT:            Member: {((2 * %stride) + %dst),+,2}<nw><%loop>
+; DEFAULT-NEXT:        Group GRP2:
+; DEFAULT-NEXT:          (Low: %dst High: (2 + %dst))
+; DEFAULT-NEXT:            Member: %dst
+; DEFAULT-EMPTY:
+; DEFAULT-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; DEFAULT-NEXT:      SCEV assumptions:
+; DEFAULT-EMPTY:
+; DEFAULT-NEXT:      Expressions re-written:
+;
+; MAXDEP5-LABEL: 'dependences_with_strides'
+; MAXDEP5-NEXT:    loop:
+; MAXDEP5-NEXT:      Memory dependences are safe with run-time checks
+; MAXDEP5-NEXT:      Too many dependences, not recorded
+; MAXDEP5-NEXT:      Run-time memory checks:
+; MAXDEP5-NEXT:      Check 0:
+; MAXDEP5-NEXT:        Comparing group GRP0:
+; MAXDEP5-NEXT:          %ptr.iv = phi ptr [ %dst, %entry ], [ %ptr.iv.next, %loop ]
+; MAXDEP5-NEXT:        Against group GRP1:
+; MAXDEP5-NEXT:          %gep.stride = getelementptr i16, ptr %ptr.iv, i64 %stride
+; MAXDEP5-NEXT:      Check 1:
+; MAXDEP5-NEXT:        Comparing group GRP0:
+; MAXDEP5-NEXT:          %ptr.iv = phi ptr [ %dst, %entry ], [ %ptr.iv.next, %loop ]
+; MAXDEP5-NEXT:        Against group GRP2:
+; MAXDEP5-NEXT:        ptr %dst
+; MAXDEP5-NEXT:      Check 2:
+; MAXDEP5-NEXT:        Comparing group GRP1:
+; MAXDEP5-NEXT:          %gep.stride = getelementptr i16, ptr %ptr.iv, i64 %stride
+; MAXDEP5-NEXT:        Against group GRP2:
+; MAXDEP5-NEXT:        ptr %dst
+; MAXDEP5-NEXT:      Grouped accesses:
+; MAXDEP5-NEXT:        Group GRP0:
+; MAXDEP5-NEXT:          (Low: %dst High: (4 + %dst))
+; MAXDEP5-NEXT:            Member: {%dst,+,2}<nw><%loop>
+; MAXDEP5-NEXT:        Group GRP1:
+; MAXDEP5-NEXT:          (Low: ((2 * %stride) + %dst) High: (4 + (2 * %stride) + %dst))
+; MAXDEP5-NEXT:            Member: {((2 * %stride) + %dst),+,2}<nw><%loop>
+; MAXDEP5-NEXT:        Group GRP2:
+; MAXDEP5-NEXT:          (Low: %dst High: (2 + %dst))
+; MAXDEP5-NEXT:            Member: %dst
+; MAXDEP5-EMPTY:
+; MAXDEP5-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; MAXDEP5-NEXT:      SCEV assumptions:
+; MAXDEP5-EMPTY:
+; MAXDEP5-NEXT:      Expressions re-written:
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %ptr.iv = phi ptr [ %dst, %entry ], [ %ptr.iv.next, %loop ]
+  store i16 0, ptr %ptr.iv, align 2
+  %gep.stride = getelementptr i16, ptr %ptr.iv, i64 %stride
+  store i16 0, ptr %gep.stride, align 2
+  store i16 0, ptr %dst, align 2
+  store i16 0, ptr %ptr.iv, align 2
+  %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 2
+  %iv.next = add i32 %iv, 1
+  %exitcond.not = icmp eq i32 %iv, 1
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
index ffa5b3c..4dde864 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
@@ -61,3 +61,29 @@ vector.body:
 end:
   ret void
 }
+
+; CHECK-LABEL: 'regression_test_is_no_wrap_access_scalable_typesize'
+; CHECK: LAA: Found an analyzable loop: loop
+; CHECK: LAA: Bad stride - Scalable object: <vscale x 4 x i32>
+define void @regression_test_is_no_wrap_access_scalable_typesize(ptr %ptr_a, i64 %n, ptr %ptr_b) {
+entry:
+  br label %loop
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %2 = shl i64 %iv, 1
+  %3 = add i64 %2, %n
+  %4 = trunc i64 %iv to i32
+  %5 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %4, i64 0
+  %6 = getelementptr i32, ptr %ptr_a, i64 %3
+  store <vscale x 4 x i32> %5, ptr %6, align 4
+  %.reass3 = or i32 %4, 1
+  %7 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %.reass3, i64 0
+  %8 = shufflevector <vscale x 4 x i32> %7, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
+  %9 = getelementptr i32, ptr %ptr_b, i64 %3
+  store <vscale x 4 x i32> %8, ptr %9, align 4
+  %iv.next = add i64 %iv, 1
+  %.not = icmp eq i64 %iv, 16
+  br i1 %.not, label %end, label %loop
+end:
+  ret void
+}
diff --git a/llvm/test/Analysis/MemorySSA/lifetime-simple.ll b/llvm/test/Analysis/MemorySSA/lifetime-simple.ll
index 18d2459..03b6768 100644
--- a/llvm/test/Analysis/MemorySSA/lifetime-simple.ll
+++ b/llvm/test/Analysis/MemorySSA/lifetime-simple.ll
@@ -9,8 +9,8 @@ entry:
   %P = alloca [32 x i8]
   %Q = call ptr @obscure(ptr %P)
 ; CHECK:  1 = MemoryDef(liveOnEntry)
-; CHECK-NEXT:   call void @llvm.lifetime.start.p0(i64 32, ptr %P)
-  call void @llvm.lifetime.start.p0(i64 32, ptr %P)
+; CHECK-NEXT:   call void @llvm.lifetime.start.p0(ptr %P)
+  call void @llvm.lifetime.start.p0(ptr %P)
 ; CHECK:  MemoryUse(1)
 ; CHECK-NEXT:   %0 = load i8, ptr %P
   %0 = load i8, ptr %P
@@ -18,8 +18,8 @@ entry:
 ; CHECK-NEXT:   store i8 1, ptr %P
   store i8 1, ptr %P
 ; CHECK:  3 = MemoryDef(2)
-; CHECK-NEXT:   call void @llvm.lifetime.end.p0(i64 32, ptr %P)
-  call void @llvm.lifetime.end.p0(i64 32, ptr %P)
+; CHECK-NEXT:   call void @llvm.lifetime.end.p0(ptr %P)
+  call void @llvm.lifetime.end.p0(ptr %P)
 ; CHECK:  MemoryUse(3)
 ; CHECK-NEXT:   %1 = load i8, ptr %P
   %1 = load i8, ptr %P
@@ -28,5 +28,5 @@ entry:
   %2 = load i8, ptr %Q
   ret i8 %1
 }
-declare void @llvm.lifetime.start.p0(i64 %S, ptr nocapture %P) readonly
-declare void @llvm.lifetime.end.p0(i64 %S, ptr nocapture %P)
+declare void @llvm.lifetime.start.p0(ptr nocapture %P) readonly
+declare void @llvm.lifetime.end.p0(ptr nocapture %P)
diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll
index b824481..22bbead 100644
--- a/llvm/test/Analysis/MemorySSA/phi-translation.ll
+++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll
@@ -465,7 +465,7 @@ end:                                          ; preds = %for.body
 define void @use_clobbered_by_def_in_loop() {
 entry:
   %nodeStack = alloca [12 x i32], align 4
-  call void @llvm.lifetime.start.p0(i64 48, ptr nonnull %nodeStack)
+  call void @llvm.lifetime.start.p0(ptr nonnull %nodeStack)
   br i1 false, label %cleanup, label %while.cond
 
 ; CHECK-LABEL: while.cond:
@@ -502,12 +502,12 @@ while.end:                                        ; preds = %while.cond, %land.r
   br i1 true, label %cleanup, label %while.cond.backedge
 
 cleanup:                                          ; preds = %while.body, %while.end, %entry
-  call void @llvm.lifetime.end.p0(i64 48, ptr nonnull %nodeStack)
+  call void @llvm.lifetime.end.p0(ptr nonnull %nodeStack)
   ret void
 }
 
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+declare void @llvm.lifetime.start.p0(ptr nocapture)
+declare void @llvm.lifetime.end.p0(ptr nocapture)
 
 define void @another_loop_clobber_inc() {
 ; CHECK-LABEL: void @another_loop_clobber_inc
diff --git a/llvm/test/Analysis/MemorySSA/pr43044.ll b/llvm/test/Analysis/MemorySSA/pr43044.ll
index bd767d3..7ae02f3 100644
--- a/llvm/test/Analysis/MemorySSA/pr43044.ll
+++ b/llvm/test/Analysis/MemorySSA/pr43044.ll
@@ -4,7 +4,7 @@
 target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
 target triple = "s390x-ibm-linux"
 
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+declare void @llvm.lifetime.end.p0(ptr nocapture)
 
 ; CHECK-LABEL: @func_42()
 define void @func_42() {
diff --git a/llvm/test/Analysis/MemorySSA/pr49859.ll b/llvm/test/Analysis/MemorySSA/pr49859.ll
index 25ef586..0e97f57 100644
--- a/llvm/test/Analysis/MemorySSA/pr49859.ll
+++ b/llvm/test/Analysis/MemorySSA/pr49859.ll
@@ -11,12 +11,12 @@ entry:
   %n = alloca i8, align 1
   %i = alloca i8, align 1
   %cleanup.dest.slot = alloca i32, align 1
-  call  void @llvm.lifetime.start.p0(i64 1, ptr %sum) #3
+  call  void @llvm.lifetime.start.p0(ptr %sum) #3
   store i8 0, ptr %sum, align 1
-  call  void @llvm.lifetime.start.p0(i64 1, ptr %n) #3
+  call  void @llvm.lifetime.start.p0(ptr %n) #3
   %call = call  i8 @idi(i8 10)
   store i8 %call, ptr %n, align 1
-  call  void @llvm.lifetime.start.p0(i64 1, ptr %i) #3
+  call  void @llvm.lifetime.start.p0(ptr %i) #3
   store i8 0, ptr %i, align 1
   br label %for.cond
 
@@ -61,9 +61,9 @@ for.inc:                                          ; preds = %if.end
 ; CHECK: final.cleanup:
 ; CHECK-NEXT: ; [[NO20:.*]] = MemoryPhi({if.then,[[NO9:.*]]},{for.cond.cleanup,[[NO8:.*]]})
 ; CHECK-NEXT: ; [[NO12:.*]] = MemoryDef([[NO20]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr %i)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr %i)
 final.cleanup:                                          ; preds = %if.then, %for.cond.cleanup
-  call  void @llvm.lifetime.end.p0(i64 1, ptr %i) #3
+  call  void @llvm.lifetime.end.p0(ptr %i) #3
   br label %for.end
 
 ; CHECK: for.end:
@@ -71,23 +71,23 @@ final.cleanup:                                          ; preds = %if.then, %for
 ; CHECK-NEXT:  %3 = load i8, ptr %sum, align 1
 for.end:                                          ; preds = %final.cleanup
   %8 = load i8, ptr %sum, align 1
-  call  void @llvm.lifetime.start.p0(i64 1, ptr %res.addr.i)
+  call  void @llvm.lifetime.start.p0(ptr %res.addr.i)
   store i8 %8, ptr %res.addr.i, align 1
   %9 = load i8, ptr %res.addr.i, align 1
   call  void @foo(i8 %9) #3
-  call  void @llvm.lifetime.end.p0(i64 1, ptr %res.addr.i)
-  call  void @llvm.lifetime.end.p0(i64 1, ptr %n) #3
-  call  void @llvm.lifetime.end.p0(i64 1, ptr %sum) #3
+  call  void @llvm.lifetime.end.p0(ptr %res.addr.i)
+  call  void @llvm.lifetime.end.p0(ptr %n) #3
+  call  void @llvm.lifetime.end.p0(ptr %sum) #3
   ret void
 }
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)  #1
+declare void @llvm.lifetime.start.p0(ptr nocapture)  #1
 
 declare i8 @idi(i8)
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)  #1
+declare void @llvm.lifetime.end.p0(ptr nocapture)  #1
 
 ; Function Attrs: nounwind
 declare void @foo(i8)
diff --git a/llvm/test/Analysis/MemorySSA/renamephis.ll b/llvm/test/Analysis/MemorySSA/renamephis.ll
index e297b99..a731ef1 100644
--- a/llvm/test/Analysis/MemorySSA/renamephis.ll
+++ b/llvm/test/Analysis/MemorySSA/renamephis.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
 declare void @g()
 
 ; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0
+declare void @llvm.lifetime.end.p0(ptr nocapture) #0
 
 ; CHECK-LABEL: @f
 define void @f(i1 %arg) align 2 {
diff --git a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
index 39b475d..7120eec 100644
--- a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
+++ b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
@@ -50,7 +50,7 @@ define i32 @d(i32 %base) {
 ;
 entry:
   %e = alloca [1 x [1 x i8]], align 1
-  call void @llvm.lifetime.start.p0(i64 1, ptr %e) #2
+  call void @llvm.lifetime.start.p0(ptr %e) #2
   br label %for.cond
 
 for.cond:                                         ; preds = %for.cond, %entry
@@ -69,4 +69,4 @@ for.cond:                                         ; preds = %for.cond, %entry
   br label %for.cond
 }
 
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+declare void @llvm.lifetime.start.p0(ptr nocapture)
diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
new file mode 100644
index 0000000..75014f3
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes='print<scalar-evolution>' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s
+
+define void @ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr(ptr %start, ptr %end) {
+; CHECK-LABEL: 'ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr'
+; CHECK-NEXT:  Determining loop execution counts for: @ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr
+; CHECK-NEXT:  Loop %loop: backedge-taken count is i64 0
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 0
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is i64 0
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
+;
+entry:
+  %end.i = ptrtoint ptr %end to i64
+  %start.i = ptrtoint ptr %start to i64
+  %sub = sub i64 %end.i, %start.i
+  %pre.1 = icmp eq i64 %sub, 4
+  call void @llvm.assume(i1 %pre.1)
+  br label %loop
+
+loop:
+  %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
+  store i32 0, ptr %iv
+  %iv.next = getelementptr inbounds nuw i8, ptr %iv, i64 4
+  %ec = icmp eq ptr %iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-apply-to-adds.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-apply-to-adds.ll
new file mode 100644
index 0000000..951b072
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-apply-to-adds.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes='print<scalar-evolution>' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s
+
+define void @max_btc_improved_by_applying_guards_to_add_subexpr(i32 %low, i32 %high) {
+; CHECK-LABEL: 'max_btc_improved_by_applying_guards_to_add_subexpr'
+; CHECK-NEXT:  Determining loop execution counts for: @max_btc_improved_by_applying_guards_to_add_subexpr
+; CHECK-NEXT:  Loop %loop: backedge-taken count is (-1 + (zext i32 (1 + (-1 * %low) + %high) to i64))<nsw>
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 7
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is (-1 + (zext i32 (1 + (-1 * %low) + %high) to i64))<nsw>
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
+;
+entry:
+  %sub = sub i32 %high, %low
+  %pre.1 = icmp slt i32 %sub, 8
+  br i1 %pre.1, label %if.then, label %exit
+
+if.then:
+  %pre.2 = icmp slt i32 %sub, 0
+  br i1 %pre.2, label %exit, label %ph
+
+ph:
+  %add.1 = add i32 %sub, 1
+  %wide.trip.count = zext i32 %add.1 to i64
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv.next, %wide.trip.count
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
index 9bf2427..7cdf3a2 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@@ -1231,7 +1231,7 @@ define void @optimized_range_check_unsigned3(ptr %pred, i1 %c) {
 ; CHECK-NEXT:    %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,3) S: [0,3) Exits: (-1 + %N)<nsw> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep = getelementptr inbounds i16, ptr %pred, i32 %iv
-; CHECK-NEXT:    --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N)<nsw> to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((zext i32 (-2 + (2 * %N)<nuw><nsw>)<nsw> to i64) + %pred) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i32 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,4) S: [1,4) Exits: %N LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @optimized_range_check_unsigned3
diff --git a/llvm/test/Analysis/ScalarEvolution/sdiv.ll b/llvm/test/Analysis/ScalarEvolution/sdiv.ll
index 9eaaf8b..acc6ab0 100644
--- a/llvm/test/Analysis/ScalarEvolution/sdiv.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sdiv.ll
@@ -38,7 +38,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
 entry:
   %storage = alloca [2 x i32], align 4
   %0 = bitcast ptr %storage to ptr
-  call void @llvm.lifetime.start.p0(i64 8, ptr %storage) #4
+  call void @llvm.lifetime.start.p0(ptr %storage) #4
   call void @llvm.memset.p0.i64(ptr align 4 %0, i8 0, i64 8, i1 false)
   br label %for.cond
 
@@ -48,7 +48,7 @@ for.cond:
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
 for.cond.cleanup:
-  call void @llvm.lifetime.end.p0(i64 8, ptr %storage) #4
+  call void @llvm.lifetime.end.p0(ptr %storage) #4
   ret void
 
 for.body:
@@ -64,10 +64,10 @@ for.body:
   br label %for.cond
 }
 
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+declare void @llvm.lifetime.start.p0(ptr nocapture) #1
 
 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
 
 declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
 
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+declare void @llvm.lifetime.end.p0(ptr nocapture) #1
diff --git a/llvm/test/Analysis/ScalarEvolution/srem.ll b/llvm/test/Analysis/ScalarEvolution/srem.ll
index 377e58a..9d4538f 100644
--- a/llvm/test/Analysis/ScalarEvolution/srem.ll
+++ b/llvm/test/Analysis/ScalarEvolution/srem.ll
@@ -38,7 +38,7 @@ define dso_local void @_Z4loopi(i32 %width) local_unnamed_addr #0 {
 entry:
   %storage = alloca [2 x i32], align 4
   %0 = bitcast ptr %storage to ptr
-  call void @llvm.lifetime.start.p0(i64 8, ptr %storage) #4
+  call void @llvm.lifetime.start.p0(ptr %storage) #4
   call void @llvm.memset.p0.i64(ptr align 4 %0, i8 0, i64 8, i1 false)
   br label %for.cond
 
@@ -48,7 +48,7 @@ for.cond:
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 
 for.cond.cleanup:
-  call void @llvm.lifetime.end.p0(i64 8, ptr %storage) #4
+  call void @llvm.lifetime.end.p0(ptr %storage) #4
   ret void
 
 for.body:
@@ -64,10 +64,10 @@ for.body:
   br label %for.cond
 }
 
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+declare void @llvm.lifetime.start.p0(ptr nocapture) #1
 
 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
 
 declare dso_local i32 @_Z3adji(i32) local_unnamed_addr #3
 
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+declare void @llvm.lifetime.end.p0(ptr nocapture) #1
diff --git a/llvm/test/Analysis/ScalarEvolutionDivision/sdiv.ll b/llvm/test/Analysis/ScalarEvolutionDivision/sdiv.ll
new file mode 100644
index 0000000..e3cf15e
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolutionDivision/sdiv.ll
@@ -0,0 +1,181 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s "-passes=print<scev-division>" -disable-output 2>&1 | FileCheck %s
+
+define noundef i8 @add(i8 %x, i8 %y) {
+; CHECK-LABEL: 'add'
+; CHECK-NEXT:  Instruction: %div = sdiv i8 %add, %y
+; CHECK-NEXT:    Numerator: (%x + %y)
+; CHECK-NEXT:    Denominator: %y
+; CHECK-NEXT:    Quotient: 1
+; CHECK-NEXT:    Remainder: %x
+;
+  %add = add i8 %x, %y
+  %div = sdiv i8 %add, %y
+  ret i8 %div
+}
+
+define noundef i8 @mul_add_mul(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: 'mul_add_mul'
+; CHECK-NEXT:  Instruction: %div = sdiv i8 %add, %a
+; CHECK-NEXT:    Numerator: ((2 * %c)<nuw><nsw> + (%a * %b)<nuw><nsw>)<nuw><nsw>
+; CHECK-NEXT:    Denominator: %a
+; CHECK-NEXT:    Quotient: %b
+; CHECK-NEXT:    Remainder: (2 * %c)<nuw><nsw>
+;
+  %mul0 = mul nsw nuw i8 %a, %b
+  %mul1 = mul nsw nuw i8 %c, 2
+  %add = add nsw nuw i8 %mul0, %mul1
+  %div = sdiv i8 %add, %a
+  ret i8 %div
+}
+
+define noundef i8 @mul(i8 %x, i8 %y) {
+; CHECK-LABEL: 'mul'
+; CHECK-NEXT:  Instruction: %div = sdiv i8 %mul, %y
+; CHECK-NEXT:    Numerator: (%x * %y)
+; CHECK-NEXT:    Denominator: %y
+; CHECK-NEXT:    Quotient: %x
+; CHECK-NEXT:    Remainder: 0
+;
+  %mul = mul i8 %x, %y
+  %div = sdiv i8 %mul, %y
+  ret i8 %div
+}
+
+define noundef i8 @add_mul_add(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: 'add_mul_add'
+; CHECK-NEXT:  Instruction: %div = sdiv i8 %mul, %a
+; CHECK-NEXT:    Numerator: ((%a + %b)<nuw><nsw> * (%a + %c)<nuw><nsw>)<nuw><nsw>
+; CHECK-NEXT:    Denominator: %a
+; CHECK-NEXT:    Quotient: 0
+; CHECK-NEXT:    Remainder: ((%a + %b)<nuw><nsw> * (%a + %c)<nuw><nsw>)<nuw><nsw>
+;
+  %add0 = add nsw nuw i8 %a, %b
+  %add1 = add nsw nuw i8 %a, %c
+  %mul = mul nsw nuw i8 %add0, %add1
+  %div = sdiv i8 %mul, %a
+  ret i8 %div
+}
+
+; for (i = 0; i < n; i++)
+;   div = i / den;
+;
+define void @addrec_iv(i8 %n, i8 %den) {
+; CHECK-LABEL: 'addrec_iv'
+; CHECK-NEXT:  Instruction: %div = sdiv i8 %i, %den
+; CHECK-NEXT:    Numerator: {0,+,1}<nuw><nsw><%loop>
+; CHECK-NEXT:    Denominator: %den
+; CHECK-NEXT:    Quotient: 0
+; CHECK-NEXT:    Remainder: {0,+,1}<nuw><nsw><%loop>
+;
+entry:
+  %guard = icmp sgt i8 %n, 0
+  br i1 %guard, label %loop, label %exit
+
+loop:
+  %i = phi i8 [ 0, %entry ], [ %i.inc, %loop ]
+  %div = sdiv i8 %i, %den
+  %i.inc = add nsw i8 %i, 1
+  %exitcond = icmp eq i8 %i.inc, %n
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; for (i = 0; i < n; i++)
+;   div = (step * i) / step;
+;
+define void @addrec_step0(i8 %n, i8 %step) {
+; CHECK-LABEL: 'addrec_step0'
+; CHECK-NEXT:  Instruction: %div = sdiv i8 %num, %step
+; CHECK-NEXT:    Numerator: {0,+,%step}<nuw><nsw><%loop>
+; CHECK-NEXT:    Denominator: %step
+; CHECK-NEXT:    Quotient: {0,+,1}<nuw><nsw><%loop>
+; CHECK-NEXT:    Remainder: 0
+;
+entry:
+  %guard = icmp sgt i8 %n, 0
+  br i1 %guard, label %loop, label %exit
+
+loop:
+  %i = phi i8 [ 0, %entry ], [ %i.inc, %loop ]
+  %num = phi i8 [ 0, %entry ], [ %num.next, %loop ]
+  %div = sdiv i8 %num, %step
+  %i.inc = add nsw i8 %i, 1
+  %num.next = add nsw nuw i8 %num, %step
+  %exitcond = icmp eq i8 %i.inc, %n
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; for (unsigned char i = 0; i < n; i++)
+;   if (cond)
+;     div = (step * i) / step;
+;
+; FIXME: The quotient can cause signed wrap, e.g., when %step is 0 and %n is
+; larger than 127.
+define void @addrec_step1(i8 %n, i8 %step) {
+; CHECK-LABEL: 'addrec_step1'
+; CHECK-NEXT:  Instruction: %div = sdiv i8 %num, %step
+; CHECK-NEXT:    Numerator: {0,+,%step}<nuw><nsw><%loop.header>
+; CHECK-NEXT:    Denominator: %step
+; CHECK-NEXT:    Quotient: {0,+,1}<nuw><nsw><%loop.header>
+; CHECK-NEXT:    Remainder: 0
+;
+entry:
+  %guard = icmp ne i8 %n, 0
+  br i1 %guard, label %loop.header, label %exit
+
+loop.header:
+  %i = phi i8 [ 0, %entry ], [ %i.inc, %loop.latch ]
+  %num = phi i8 [ 0, %entry ], [ %num.next, %loop.latch ]
+  %cond = freeze i1 poison
+  br i1 %cond, label %division, label %loop.latch
+
+division:
+  %div = sdiv i8 %num, %step
+  br label %loop.latch
+
+loop.latch:
+  %i.inc = add nuw i8 %i, 1
+  %num.next = add nsw nuw i8 %num, %step
+  %exitcond = icmp eq i8 %i.inc, %n
+  br i1 %exitcond, label %exit, label %loop.header
+
+exit:
+  ret void
+}
+
+; for (i = 0; i < n; i++)
+;   div = (a + b) * i / a;
+;
+; FIXME: Both the quotient and the remainder can cause signed/unsigned wrap,
+; e.g., when %a + %b = 0 && %a != 0.
+define void @addrec_a_b(i8 %n, i8 %a, i8 %b) {
+; CHECK-LABEL: 'addrec_a_b'
+; CHECK-NEXT:  Instruction: %div = sdiv i8 %num, %step
+; CHECK-NEXT:    Numerator: {0,+,(%a + %b)}<nuw><nsw><%loop>
+; CHECK-NEXT:    Denominator: (%a + %b)
+; CHECK-NEXT:    Quotient: {0,+,1}<nuw><nsw><%loop>
+; CHECK-NEXT:    Remainder: 0
+;
+entry:
+  %guard = icmp sgt i8 %n, 0
+  %step = add nsw nuw i8 %a, %b
+  br i1 %guard, label %loop, label %exit
+
+loop:
+  %i = phi i8 [ 0, %entry ], [ %i.inc, %loop ]
+  %num = phi i8 [ 0, %entry ], [ %num.next, %loop ]
+  %div = sdiv i8 %num, %step
+  %i.inc = add nsw i8 %i, 1
+  %num.next = add nsw nuw i8 %num, %step
+  %exitcond = icmp eq i8 %i.inc, %n
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
index 840a517..36d79f9 100644
--- a/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
+++ b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
@@ -8,10 +8,10 @@ define i8 @test(i8 %input) {
   %dst = alloca i8
   %src = alloca i8
 ; CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %src, i64 1, i1 false), !alias.scope ![[SCOPE:[0-9]+]]
-  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %src), !noalias !4
+  call void @llvm.lifetime.start.p0(ptr nonnull %src), !noalias !4
   store i8 %input, ptr %src
   call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false), !alias.scope !0
-  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %src), !noalias !4
+  call void @llvm.lifetime.end.p0(ptr nonnull %src), !noalias !4
   call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 1, i1 false), !alias.scope !4
   %ret_value = load i8, ptr %dst
   call void @use(ptr %src)
@@ -23,8 +23,8 @@ define i8 @test(i8 %input) {
 ; CHECK-DAG: ![[CALLEE0_B:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}}, !"callee0: %b"}
 ; CHECK-DAG: ![[SCOPE]] = !{![[CALLEE0_A]], ![[CALLEE0_B]]}
 
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.start.p0(ptr nocapture)
+declare void @llvm.lifetime.end.p0(ptr nocapture)
 declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
 
 !0 = !{!1, !7}
diff --git a/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll b/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll
index 6c3dec9..51bfa15 100644
--- a/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll
+++ b/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll
@@ -11,31 +11,31 @@ entry:
 ; CHECK: %y = alloca i32, align 4
 ; CHECK-NEXT: Alive: <>
   %z = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %z)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z)
+  call void @llvm.lifetime.start.p0(ptr %z)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %z)
 ; CHECK-NEXT: Alive: <z>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x z>
 
   call void @capture32(ptr %x)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <z>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <y z>
 
   call void @capture32(ptr %y)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y)
 ; CHECK-NEXT: Alive: <z>
 
   call void @capture32(ptr %z)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %z)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z)
+  call void @llvm.lifetime.end.p0(ptr %z)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %z)
 ; CHECK-NEXT: Alive: <>
 
   ret void
@@ -48,13 +48,13 @@ entry:
 ; CHECK-NEXT: Alive: <y>
   %x = alloca i32, align 4
   %y = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x y>
 
   call void @capture32(ptr %x)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <y>
 
   call void @capture32(ptr %y)
@@ -69,31 +69,31 @@ entry:
   %x = alloca i32, align 4
   %y = alloca i32, align 4
   %z = alloca i64, align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x y>
 
   call void @capture32(ptr %x)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <y>
 
   call void @capture32(ptr %y)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y)
 ; CHECK-NEXT: Alive: <>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %z)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z)
+  call void @llvm.lifetime.start.p0(ptr %z)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %z)
 ; CHECK-NEXT: Alive: <z>
 
   call void @capture64(ptr %z)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %z)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z)
+  call void @llvm.lifetime.end.p0(ptr %z)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %z)
 ; CHECK-NEXT: Alive: <>
 
   ret void
@@ -111,31 +111,31 @@ entry:
 ; CHECK-NEXT: Alive: <>
   %z = alloca i64, align 4
   %y = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x y>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %z)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z)
+  call void @llvm.lifetime.start.p0(ptr %z)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %z)
 ; CHECK-NEXT: Alive: <x y z>
 
   call void @capture32(ptr %x)
   call void @capture32(ptr %y)
   call void @capture64(ptr %z)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <y z>
 
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y)
 ; CHECK-NEXT: Alive: <z>
 
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %z)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z)
+  call void @llvm.lifetime.end.p0(ptr %z)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %z)
 ; CHECK-NEXT: Alive: <>
 
   ret void
@@ -154,12 +154,12 @@ entry:
   %z = alloca i64, align 8
   %z1 = alloca i64, align 8
   %z2 = alloca i64, align 8
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x1)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x1)
+  call void @llvm.lifetime.start.p0(ptr %x1)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x1)
 ; CHECK-NEXT: Alive: <x1>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x2)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x2)
+  call void @llvm.lifetime.start.p0(ptr %x2)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x2)
 ; CHECK-NEXT: Alive: <x1 x2>
 
   call void @capture64(ptr nonnull %x1)
@@ -171,8 +171,8 @@ entry:
 if.then:                                          ; preds = %entry
 ; CHECK: if.then:
 ; CHECK-NEXT: Alive: <x1 x2>
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x1 x2 y>
 
   call void @capture64(ptr nonnull %y)
@@ -181,13 +181,13 @@ if.then:                                          ; preds = %entry
 if.then3:                                         ; preds = %if.then
 ; CHECK: if.then3:
 ; CHECK-NEXT: Alive: <x1 x2 y>
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y1)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y1)
+  call void @llvm.lifetime.start.p0(ptr %y1)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y1)
 ; CHECK-NEXT: Alive: <x1 x2 y y1>
 
   call void @capture64(ptr nonnull %y1)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %y1)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y1)
+  call void @llvm.lifetime.end.p0(ptr %y1)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y1)
 ; CHECK-NEXT: Alive: <x1 x2 y>
 
   br label %if.end
@@ -195,13 +195,13 @@ if.then3:                                         ; preds = %if.then
 if.else:                                          ; preds = %if.then
 ; CHECK: if.else:
 ; CHECK-NEXT: Alive: <x1 x2 y>
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y2)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y2)
+  call void @llvm.lifetime.start.p0(ptr %y2)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y2)
 ; CHECK-NEXT: Alive: <x1 x2 y y2>
 
   call void @capture64(ptr nonnull %y2)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %y2)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y2)
+  call void @llvm.lifetime.end.p0(ptr %y2)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y2)
 ; CHECK-NEXT: Alive: <x1 x2 y>
 
   br label %if.end
@@ -209,8 +209,8 @@ if.else:                                          ; preds = %if.then
 if.end:                                           ; preds = %if.else, %if.then3
 ; CHECK: if.end:
 ; CHECK-NEXT: Alive: <x1 x2 y>
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x1 x2>
 
   br label %if.end9
@@ -222,8 +222,8 @@ if.else4:                                         ; preds = %entry
 ; CHECK: %z.cast = bitcast ptr %z to ptr
 ; CHECK-NEXT: Alive: <x1 x2>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %z)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z)
+  call void @llvm.lifetime.start.p0(ptr %z)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %z)
 ; CHECK-NEXT: Alive: <x1 x2 z>
 
   call void @capture64(ptr nonnull %z)
@@ -232,13 +232,13 @@ if.else4:                                         ; preds = %entry
 if.then6:                                         ; preds = %if.else4
 ; CHECK: if.then6:
 ; CHECK-NEXT: Alive: <x1 x2 z>
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %z1)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z1)
+  call void @llvm.lifetime.start.p0(ptr %z1)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %z1)
 ; CHECK-NEXT: Alive: <x1 x2 z z1>
 
   call void @capture64(ptr nonnull %z1)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %z1)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z1)
+  call void @llvm.lifetime.end.p0(ptr %z1)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %z1)
 ; CHECK-NEXT: Alive: <x1 x2 z>
 
   br label %if.end8
@@ -246,13 +246,13 @@ if.then6:                                         ; preds = %if.else4
 if.else7:                                         ; preds = %if.else4
 ; CHECK: if.else7:
 ; CHECK-NEXT: Alive: <x1 x2 z>
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %z2)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z2)
+  call void @llvm.lifetime.start.p0(ptr %z2)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %z2)
 ; CHECK-NEXT: Alive: <x1 x2 z z2>
 
   call void @capture64(ptr nonnull %z2)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %z2)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z2)
+  call void @llvm.lifetime.end.p0(ptr %z2)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %z2)
 ; CHECK-NEXT: Alive: <x1 x2 z>
 
   br label %if.end8
@@ -260,8 +260,8 @@ if.else7:                                         ; preds = %if.else4
 if.end8:                                          ; preds = %if.else7, %if.then6
 ; CHECK: if.end8:
 ; CHECK-NEXT: Alive: <x1 x2 z>
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %z)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z)
+  call void @llvm.lifetime.end.p0(ptr %z)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %z)
 ; CHECK-NEXT: Alive: <x1 x2>
 
   br label %if.end9
@@ -269,12 +269,12 @@ if.end8:                                          ; preds = %if.else7, %if.then6
 if.end9:                                          ; preds = %if.end8, %if.end
 ; CHECK: if.end9:
 ; CHECK-NEXT: Alive: <x1 x2>
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x2)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x2)
+  call void @llvm.lifetime.end.p0(ptr %x2)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x2)
 ; CHECK-NEXT: Alive: <x1>
 
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x1)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x1)
+  call void @llvm.lifetime.end.p0(ptr %x1)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x1)
 ; CHECK-NEXT: Alive: <>
 
   ret void
@@ -287,8 +287,8 @@ entry:
 ; CHECK-NEXT: Alive: <>
   %x = alloca i32, align 4
   %y = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   call void @capture32(ptr %x)
@@ -297,17 +297,17 @@ entry:
 bb2:                                              ; preds = %entry
 ; CHECK: bb2:
 ; CHECK-NEXT: Alive: <x>
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x y>
 
   call void @capture32(ptr %y)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x>
 
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <>
 
   ret void
@@ -315,8 +315,8 @@ bb2:                                              ; preds = %entry
 bb3:                                              ; preds = %entry
 ; CHECK: bb3:
 ; CHECK-NEXT: Alive: <x>
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <>
 
   ret void
@@ -329,13 +329,13 @@ entry:
 ; CHECK-NEXT: Alive: <>
   %x = alloca i32, align 4
   %y = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   call void @capture32(ptr %x)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <>
 
   br i1 %d, label %bb2, label %bb3
@@ -343,13 +343,13 @@ entry:
 bb2:                                              ; preds = %entry
 ; CHECK: bb2:
 ; CHECK-NEXT: Alive: <>
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <y>
 
   call void @capture32(ptr %y)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y)
 ; CHECK-NEXT: Alive: <>
 
   ret void
@@ -367,13 +367,13 @@ entry:
 ; CHECK-NEXT: Alive: <>
   %x = alloca i32, align 4
   %y = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   call void @capture32(ptr %x)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <>
 
   br i1 %d, label %bb2, label %bb3
@@ -381,8 +381,8 @@ entry:
 bb2:                                              ; preds = %entry
 ; CHECK: bb2:
 ; CHECK-NEXT: Alive: <>
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <y>
 
   call void @capture32(ptr %y)
@@ -401,8 +401,8 @@ entry:
 ; CHECK-NEXT: Alive: <>
   %x = alloca i32, align 4
   %y = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   call void @capture32(ptr %x)
@@ -411,12 +411,12 @@ entry:
 bb2:                                              ; preds = %entry
 ; CHECK: bb2:
 ; CHECK-NEXT: Alive: <x>
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <y>
 
   call void @capture32(ptr %y)
@@ -436,8 +436,8 @@ entry:
   %x = alloca i32, align 4
   %y = alloca i32, align 4
   call void @capture32(ptr %x)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   br i1 %d, label %bb2, label %bb3
@@ -445,8 +445,8 @@ entry:
 bb2:                                              ; preds = %entry
 ; CHECK: bb2:
 ; CHECK-NEXT: Alive: <x>
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x y>
 
   call void @capture32(ptr %y)
@@ -467,12 +467,12 @@ entry:
   %B.i2 = alloca [100 x i32], align 4
   %A.i = alloca [100 x i32], align 4
   %B.i = alloca [100 x i32], align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i)
+  call void @llvm.lifetime.start.p0(ptr %A.i)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %A.i)
 ; CHECK-NEXT: Alive: <A.i>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i)
+  call void @llvm.lifetime.start.p0(ptr %B.i)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %B.i)
 ; CHECK-NEXT: Alive: <A.i B.i>
 
   call void @capture100x32(ptr %A.i)
@@ -480,30 +480,30 @@ entry:
 ; CHECK-NEXT: Alive: <A.i B.i>
 
   call void @capture100x32(ptr %B.i)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i)
+  call void @llvm.lifetime.end.p0(ptr %A.i)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %A.i)
 ; CHECK-NEXT: Alive: <B.i>
 
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i)
+  call void @llvm.lifetime.end.p0(ptr %B.i)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %B.i)
 ; CHECK-NEXT: Alive: <>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i1)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i1)
+  call void @llvm.lifetime.start.p0(ptr %A.i1)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %A.i1)
 ; CHECK-NEXT: Alive: <A.i1>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i2)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i2)
+  call void @llvm.lifetime.start.p0(ptr %B.i2)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %B.i2)
 ; CHECK-NEXT: Alive: <A.i1 B.i2>
 
   call void @capture100x32(ptr %A.i1)
   call void @capture100x32(ptr %B.i2)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i1)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i1)
+  call void @llvm.lifetime.end.p0(ptr %A.i1)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %A.i1)
 ; CHECK-NEXT: Alive: <B.i2>
 
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i2)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i2)
+  call void @llvm.lifetime.end.p0(ptr %B.i2)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %B.i2)
 ; CHECK-NEXT: Alive: <>
 
   ret void
@@ -516,20 +516,20 @@ entry:
 ; CHECK-NEXT: Alive: <>
   %buf1 = alloca i8, i32 100000, align 16
   %buf2 = alloca i8, i32 100000, align 16
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %buf1)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %buf1)
+  call void @llvm.lifetime.start.p0(ptr %buf1)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %buf1)
 ; CHECK-NEXT: Alive: <buf1>
 
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %buf1)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %buf1)
+  call void @llvm.lifetime.end.p0(ptr %buf1)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %buf1)
 ; CHECK-NEXT: Alive: <>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %buf1)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %buf1)
+  call void @llvm.lifetime.start.p0(ptr %buf1)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %buf1)
 ; CHECK-NEXT: Alive: <buf1>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %buf2)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %buf2)
+  call void @llvm.lifetime.start.p0(ptr %buf2)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %buf2)
 ; CHECK-NEXT: Alive: <buf1 buf2>
 
   call void @capture8(ptr %buf1)
@@ -546,22 +546,22 @@ entry:
   %B.i2 = alloca [100 x i32], align 4
   %A.i = alloca [100 x i32], align 4
   %B.i = alloca [100 x i32], align 4
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i)
+  call void @llvm.lifetime.start.p0(ptr %A.i)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %A.i)
 ; CHECK-NEXT: Alive: <A.i A.i1 B.i2>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i)
+  call void @llvm.lifetime.start.p0(ptr %B.i)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %B.i)
 ; CHECK-NEXT: Alive: <A.i A.i1 B.i B.i2>
 
   call void @capture100x32(ptr %A.i)
   call void @capture100x32(ptr %B.i)
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i)
+  call void @llvm.lifetime.end.p0(ptr %A.i)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %A.i)
 ; CHECK-NEXT: Alive: <A.i1 B.i B.i2>
 
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i)
+  call void @llvm.lifetime.end.p0(ptr %B.i)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %B.i)
 ; CHECK-NEXT: Alive: <A.i1 B.i2>
 
   br label %block2
@@ -583,23 +583,23 @@ entry:
 ; CHECK-NEXT: Alive: <>
   %a.i = alloca [4 x %struct.Klass], align 16
   %b.i = alloca [4 x %struct.Klass], align 16
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %a.i)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %a.i)
+  call void @llvm.lifetime.start.p0(ptr %a.i)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %a.i)
 ; CHECK-NEXT: Alive: <a.i>
 
-  call void @llvm.lifetime.start.p0(i64 -1, ptr %b.i)
-; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %b.i)
+  call void @llvm.lifetime.start.p0(ptr %b.i)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %b.i)
 ; CHECK-NEXT: Alive: <a.i b.i>
 
   call void @capture8(ptr %a.i)
   call void @capture8(ptr %b.i)
   %z3 = load i32, ptr %a.i, align 16
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %a.i)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %a.i)
+  call void @llvm.lifetime.end.p0(ptr %a.i)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %a.i)
 ; CHECK-NEXT: Alive: <b.i>
 
-  call void @llvm.lifetime.end.p0(i64 -1, ptr %b.i)
-; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %b.i)
+  call void @llvm.lifetime.end.p0(ptr %b.i)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %b.i)
 ; CHECK-NEXT: Alive: <>
 
   ret i32 %z3
@@ -611,8 +611,8 @@ entry:
 ; CHECK: entry:
 ; CHECK-NEXT: Alive: <>
   %x = alloca i8, align 4
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   br label %l2
@@ -622,8 +622,8 @@ l2:                                               ; preds = %l2, %entry
 ; MAY-NEXT: Alive: <x>
 ; MUST-NEXT: Alive: <>
   call void @capture8(ptr %x)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <>
 
   br label %l2
@@ -636,8 +636,8 @@ entry:
 ; CHECK-NEXT: Alive: <>
   %x = alloca i8, align 4
   %y = alloca i8, align 4
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   br label %l2
@@ -645,17 +645,17 @@ entry:
 l2:                                               ; preds = %l2, %entry
 ; CHECK: l2:
 ; CHECK-NEXT: Alive: <x>
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x y>
 
   call void @capture8(ptr %y)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x>
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   call void @capture8(ptr %x)
@@ -677,24 +677,24 @@ entry:
 if.then:                                          ; preds = %entry
 ; CHECK: if.then:
 ; CHECK-NEXT: Alive: <>
-  call void @llvm.lifetime.start.p0(i64 500, ptr nonnull %a)
-; CHECK: call void @llvm.lifetime.start.p0(i64 500, ptr nonnull %a)
+  call void @llvm.lifetime.start.p0(ptr nonnull %a)
+; CHECK: call void @llvm.lifetime.start.p0(ptr nonnull %a)
 ; CHECK-NEXT: Alive: <a>
   tail call void @capture8(ptr %a)
-  call void @llvm.lifetime.end.p0(i64 500, ptr nonnull %a)
-; CHECK: call void @llvm.lifetime.end.p0(i64 500, ptr nonnull %a)
+  call void @llvm.lifetime.end.p0(ptr nonnull %a)
+; CHECK: call void @llvm.lifetime.end.p0(ptr nonnull %a)
 ; CHECK-NEXT: Alive: <>
   br label %if.end
 
 if.else:                                          ; preds = %entry
 ; CHECK: if.else:
 ; CHECK-NEXT: Alive: <>
-  call void @llvm.lifetime.start.p0(i64 500, ptr nonnull %b)
-; CHECK: call void @llvm.lifetime.start.p0(i64 500, ptr nonnull %b)
+  call void @llvm.lifetime.start.p0(ptr nonnull %b)
+; CHECK: call void @llvm.lifetime.start.p0(ptr nonnull %b)
 ; CHECK-NEXT: Alive: <b>
   tail call void @capture8(ptr %b)
-  call void @llvm.lifetime.end.p0(i64 500, ptr nonnull %b)
-; CHECK: call void @llvm.lifetime.end.p0(i64 500, ptr nonnull %b)
+  call void @llvm.lifetime.end.p0(ptr nonnull %b)
+; CHECK: call void @llvm.lifetime.end.p0(ptr nonnull %b)
 ; CHECK-NEXT: Alive: <>
   br label %if.end
 
@@ -719,8 +719,8 @@ entry:
 if.then:
 ; CHECK: if.then:
 ; CHECK-NEXT: Alive: <>
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <y>
 
   br label %if.end
@@ -730,12 +730,12 @@ if.then:
 if.else:
 ; CHECK: if.else:
 ; CHECK-NEXT: Alive: <>
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <y>
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x y>
 
   br label %if.end
@@ -758,12 +758,12 @@ entry:
   %x = alloca i8, align 4
   %y = alloca i8, align 4
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <y>
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x y>
 
   br label %end
@@ -773,7 +773,7 @@ entry:
 dead:
 ; CHECK: dead:
 ; CHECK-NOT: Alive:
-  call void @llvm.lifetime.start.p0(i64 4, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
 
   br label %end
 ; CHECK: br label %end
@@ -792,20 +792,20 @@ entry:
 ; CHECK: entry:
 ; CHECK-NEXT: Alive: <>
   %x = alloca i8
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
-  call void @llvm.lifetime.end.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <>
 
-  call void @llvm.lifetime.end.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <>
 
   ret void
@@ -827,8 +827,8 @@ if.then:
 ; CHECK: if.then:
 ; MAY-NEXT: Alive: <x y>
 ; MUST-NEXT: Alive: <>
-  call void @llvm.lifetime.end.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y)
 ; MAY-NEXT: Alive: <x>
 ; MUST-NEXT: Alive: <>
 
@@ -840,12 +840,12 @@ if.then:
 if.else:
 ; CHECK: if.else:
 ; CHECK-NEXT: Alive: <>
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <y>
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x y>
 
   br label %if.then
@@ -868,8 +868,8 @@ entry:
   %x = alloca i8, align 4
   %y = alloca i8, align 4
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   br i1 %a, label %if.then, label %if.else
@@ -880,8 +880,8 @@ if.then:
 ; CHECK: if.then:
 ; MAY-NEXT: Alive: <x>
 ; MUST-NEXT: Alive: <>
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; MAY-NEXT: Alive: <x y>
 ; MUST-NEXT: Alive: <y>
 
@@ -893,12 +893,12 @@ if.then:
 if.else:
 ; CHECK: if.else:
 ; CHECK-NEXT: Alive: <x>
-  call void @llvm.lifetime.end.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x>
 
-  call void @llvm.lifetime.end.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.end.p0(ptr %x)
 ; CHECK-NEXT: Alive: <>
 
   br label %if.then
@@ -921,8 +921,8 @@ entry:
   %x = alloca i8, align 4
   %y = alloca i8, align 4
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %x)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %x)
 ; CHECK-NEXT: Alive: <x>
 
   br i1 %a, label %if.then, label %if.end
@@ -933,8 +933,8 @@ if.then:
 ; CHECK: if.then:
 ; MAY-NEXT: Alive: <x y>
 ; MUST-NEXT: Alive: <x>
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %y)
+; CHECK: call void @llvm.lifetime.start.p0(ptr %y)
 ; CHECK-NEXT: Alive: <x y>
 
   br i1 %a, label %if.then, label %if.end
@@ -949,8 +949,8 @@ if.end:
   ret void
 }
 
-declare void @llvm.lifetime.start.p0(i64, ptr captures(none))
-declare void @llvm.lifetime.end.p0(i64, ptr captures(none))
+declare void @llvm.lifetime.start.p0(ptr captures(none))
+declare void @llvm.lifetime.end.p0(ptr captures(none))
 declare void @capture8(ptr)
 declare void @capture32(ptr)
 declare void @capture64(ptr)
diff --git a/llvm/test/Analysis/StackSafetyAnalysis/local.ll b/llvm/test/Analysis/StackSafetyAnalysis/local.ll
index 02d46c8..6944f38 100644
--- a/llvm/test/Analysis/StackSafetyAnalysis/local.ll
+++ b/llvm/test/Analysis/StackSafetyAnalysis/local.ll
@@ -707,9 +707,9 @@ entry:
   %n = load i8, ptr %y
   call void @llvm.memset.p0.i32(ptr nonnull %z, i8 0, i32 1, i1 false)
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-  call void @llvm.lifetime.start.p0(i64 1, ptr %z)
+  call void @llvm.lifetime.start.p0(ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %z)
 
   ret void
 }
@@ -731,9 +731,9 @@ entry:
   %y = alloca i8, align 4
   %z = alloca i8, align 4
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-  call void @llvm.lifetime.start.p0(i64 1, ptr %z)
+  call void @llvm.lifetime.start.p0(ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %z)
 
   store i8 5, ptr %x
   %n = load i8, ptr %y
@@ -756,13 +756,13 @@ entry:
   %y = alloca i8, align 4
   %z = alloca i8, align 4
 
-  call void @llvm.lifetime.start.p0(i64 1, ptr %x)
-  call void @llvm.lifetime.start.p0(i64 1, ptr %y)
-  call void @llvm.lifetime.start.p0(i64 1, ptr %z)
+  call void @llvm.lifetime.start.p0(ptr %x)
+  call void @llvm.lifetime.start.p0(ptr %y)
+  call void @llvm.lifetime.start.p0(ptr %z)
 
-  call void @llvm.lifetime.end.p0(i64 1, ptr %x)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %y)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %z)
+  call void @llvm.lifetime.end.p0(ptr %x)
+  call void @llvm.lifetime.end.p0(ptr %y)
+  call void @llvm.lifetime.end.p0(ptr %z)
 
   store i8 5, ptr %x
   %n = load i8, ptr %y
@@ -973,13 +973,13 @@ define void @DoubleLifetime() {
 ; CHECK-EMPTY:
 entry:
   %a = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 4, ptr %a)
-  call void @llvm.lifetime.end.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.start.p0(ptr %a)
+  call void @llvm.lifetime.end.p0(ptr %a)
   call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 true)
 
-  call void @llvm.lifetime.start.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.start.p0(ptr %a)
   call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false)
-  call void @llvm.lifetime.end.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.end.p0(ptr %a)
   ret void
 }
 
@@ -993,13 +993,13 @@ define void @DoubleLifetime2() {
 ; CHECK-EMPTY:
 entry:
   %a = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 4, ptr %a)
-  call void @llvm.lifetime.end.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.start.p0(ptr %a)
+  call void @llvm.lifetime.end.p0(ptr %a)
   %n = load i32, ptr %a
 
-  call void @llvm.lifetime.start.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.start.p0(ptr %a)
   call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false)
-  call void @llvm.lifetime.end.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.end.p0(ptr %a)
   ret void
 }
 
@@ -1013,13 +1013,13 @@ define void @DoubleLifetime3() {
 ; CHECK-EMPTY:
 entry:
   %a = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 4, ptr %a)
-  call void @llvm.lifetime.end.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.start.p0(ptr %a)
+  call void @llvm.lifetime.end.p0(ptr %a)
   store i32 5, ptr %a
 
-  call void @llvm.lifetime.start.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.start.p0(ptr %a)
   call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false)
-  call void @llvm.lifetime.end.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.end.p0(ptr %a)
   ret void
 }
 
@@ -1033,9 +1033,9 @@ define void @DoubleLifetime4() {
 ; CHECK-EMPTY:
 entry:
   %a = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.start.p0(ptr %a)
   call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false)
-  call void @llvm.lifetime.end.p0(i64 4, ptr %a)
+  call void @llvm.lifetime.end.p0(ptr %a)
   call void @unknown_call(ptr %a)
   ret void
 }
@@ -1136,5 +1136,5 @@ entry:
   ret ptr null
 }
 
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.start.p0(ptr nocapture)
+declare void @llvm.lifetime.end.p0(ptr nocapture)
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir
index 029d87b..c61113b 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn-- -passes='print<machine-uniformity>' -filetype=null %s 2>&1 | FileCheck %s
 
 # CHECK-LABEL: MachineUniformityInfo for function:  @basic
-# CHECK-NEXT: CYCLES ASSSUMED DIVERGENT:
+# CHECK-NEXT: CYCLES ASSUMED DIVERGENT:
 # CHECK-NEXT: depth=1: entries(bb.1 bb.3) bb.2
 # CHECK-LABEL: BLOCK bb.1
 # CHECK: DIVERGENT
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir
index 524ea4e..dfa05f4 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir
@@ -1,7 +1,7 @@
 # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
 # RUN: llc -mtriple=amdgcn-- -passes='print<machine-uniformity>' -filetype=null %s 2>&1 | FileCheck %s
 # CHECK-LABEL: MachineUniformityInfo for function:  @divergent_cycle_1
-# CHECK-NEXT: CYCLES ASSSUMED DIVERGENT:
+# CHECK-NEXT: CYCLES ASSUMED DIVERGENT:
 # CHECK-NEXT: depth=1: entries(bb.3 bb.1) bb.4 bb.2
 # CHECK-NEXT: CYCLES WITH DIVERGENT EXIT:
 # CHECK-NEXT: depth=2: entries(bb.4 bb.1) bb.2
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir
index 63e0e78..8627db0 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir
@@ -1,7 +1,7 @@
 # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
 # RUN: llc -mtriple=amdgcn-- -passes='print<machine-uniformity>' -filetype=null %s 2>&1 | FileCheck %s
 # CHECK-LABEL: MachineUniformityInfo for function:  @basic
-# CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+# CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 # CHECK: CYCLES WITH DIVERGENT EXIT:
 # CHECK: depth=1: entries(bb.1 bb.3) bb.2
 
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir
index 7050f6d..575092d 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir
@@ -7,7 +7,7 @@
 #              |
 #             bb3
 # CHECK-LABEL: MachineUniformityInfo for function:  @cycle_diverge_enter
-# CHECK-NEXT: CYCLES ASSSUMED DIVERGENT:
+# CHECK-NEXT: CYCLES ASSUMED DIVERGENT:
 # CHECK-NEXT: depth=1: entries(bb.2 bb.1)
 # CHECK-NEXT: CYCLES WITH DIVERGENT EXIT:
 # CHECK-NEXT: depth=1: entries(bb.2 bb.1)
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
index 099fd2f..d5c6000 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
@@ -854,6 +854,15 @@ define amdgpu_cs_chain void @dead(ptr addrspace(1) %out) {
   ret void
 }
 
+; CHECK: DIVERGENT:   %v = call i32 (ptr, ...) @llvm.amdgcn.call.whole.wave.i32.p0(ptr @wwf, i32 15)
+define amdgpu_cs void @call_whole_wave(ptr addrspace(1) %out) {
+  %v = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave.i32.p0(ptr @wwf, i32 15)
+  store i32 %v, ptr addrspace(1) %out
+  ret void
+}
+
+declare amdgpu_gfx_whole_wave i32 @wwf(i1, i32) #0
+
 declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1
 declare i32 @llvm.amdgcn.permlane16.i32(i32, i32, i32, i32, i1, i1) #1
 declare i32 @llvm.amdgcn.permlanex16.i32(i32, i32, i32, i32, i1, i1) #1
@@ -931,6 +940,7 @@ declare <8 x half> @llvm.amdgcn.ds.load.tr16.b128.v8f16(ptr addrspace(3))
 declare <8 x bfloat> @llvm.amdgcn.ds.load.tr16.b128.v8bf16(ptr addrspace(3))
 
 declare i32 @llvm.amdgcn.dead.i32()
+declare i32 @llvm.amdgcn.call.whole.wave.i32.p0(ptr, ...) #0
 
 attributes #0 = { nounwind convergent }
 attributes #1 = { nounwind readnone convergent }
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll
index 7fd8ac4..92e98e28 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll
@@ -1,7 +1,7 @@
 ; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
 
 ; CHECK=LABEL: UniformityInfo for function 'basic':
-; CHECK: CYCLES ASSSUMED DIVERGENT:
+; CHECK: CYCLES ASSUMED DIVERGENT:
 ; CHECK:   depth=1: entries(P T) Q
 define amdgpu_kernel void @basic(i32 %a, i32 %b, i32 %c) {
 entry:
@@ -38,7 +38,7 @@ exit:
 }
 
 ; CHECK=LABEL: UniformityInfo for function 'nested':
-; CHECK: CYCLES ASSSUMED DIVERGENT:
+; CHECK: CYCLES ASSUMED DIVERGENT:
 ; CHECK:  depth=1: entries(P T) Q A C B
 define amdgpu_kernel void @nested(i32 %a, i32 %b, i32 %c) {
 entry:
@@ -88,7 +88,7 @@ exit:
 ;            depth=3: entries(B) D
 ;
 ; CHECK-LABEL: UniformityInfo for function 'irreducible_outer_cycle':
-; CHECK: CYCLES ASSSUMED DIVERGENT:
+; CHECK: CYCLES ASSUMED DIVERGENT:
 ; CHECK:  depth=1: entries(Q R) C B D
 define void @irreducible_outer_cycle(i1 %c1, i1 %c2, i1 %c3) {
 entry:
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll
index fb83b19..033ecac 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll
@@ -2,7 +2,7 @@
 
 define amdgpu_kernel void @divergent_cycle_1(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: UniformityInfo for function 'divergent_cycle_1':
-; CHECK: CYCLES ASSSUMED DIVERGENT:
+; CHECK: CYCLES ASSUMED DIVERGENT:
 ; CHECK:   depth=1: entries(R P) S Q
 ; CHECK: CYCLES WITH DIVERGENT EXIT:
 ; CHECK:   depth=2: entries(S P) Q
@@ -40,7 +40,7 @@ exit:
 
 define amdgpu_kernel void @uniform_cycle_1(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: UniformityInfo for function 'uniform_cycle_1':
-; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+; CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 ; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
 entry:
  %cond.uni = icmp slt i32 %a, 0
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll
index 8dd44eb..35a165d 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll
@@ -10,7 +10,7 @@
 ;; both cycles are reported as converged.
 ;;
 ;; CHECK-LABEL: UniformityInfo for function 'headers_b_p':
-;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+;; CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 ;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
 
 define amdgpu_kernel void @headers_b_p(i32 %a, i32 %b, i32 %c) {
@@ -68,7 +68,7 @@ exit:
 ;; both cycles are reported as converged.
 ;;
 ;; CHECK-LABEL: UniformityInfo for function 'headers_a_p':
-;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+;; CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 ;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
 
 define amdgpu_kernel void @headers_a_p(i32 %a, i32 %b, i32 %c) {
@@ -126,7 +126,7 @@ exit:
 ;; only the inner cycle is reported as diverged.
 ;;
 ;; CHECK-LABEL: UniformityInfo for function 'headers_b_t':
-;; CHECK: CYCLES ASSSUMED DIVERGENT:
+;; CHECK: CYCLES ASSUMED DIVERGENT:
 ;; CHECK:   depth=2: entries(T P) S Q R
 ;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
 
@@ -184,7 +184,7 @@ exit:
 ;; Hence the outermost cycle is reported as diverged.
 ;;
 ;; CHECK-LABEL: UniformityInfo for function 'headers_a_t':
-;; CHECK: CYCLES ASSSUMED DIVERGENT:
+;; CHECK: CYCLES ASSUMED DIVERGENT:
 ;; CHECK:   depth=1: entries(A B) D T S Q P R C
 ;; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
 
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll
index efad77b..6d1a95b 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll
@@ -10,7 +10,7 @@
 ;; the entire cycle is reported as converged.
 ;;
 ;; CHECK-LABEL: UniformityInfo for function 't_header':
-;; CHECK: CYCLES ASSSUMED DIVERGENT:
+;; CHECK: CYCLES ASSUMED DIVERGENT:
 ;; CHECK:   depth=1: entries(T P) S Q R
 
 define amdgpu_kernel void @t_header(i32 %a, i32 %b, i32 %c) {
@@ -59,7 +59,7 @@ exit:
 ;; the cycle is reported as converged.
 ;;
 ;; CHECK-LABEL: UniformityInfo for function 'p_header':
-;; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+;; CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 
 define amdgpu_kernel void @p_header(i32 %a, i32 %b, i32 %c) {
 entry:
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll
index 2a3ff41..784f58e 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll
@@ -1,7 +1,7 @@
 ; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s
 
 ; CHECK=LABEL: UniformityInfo for function 'basic':
-; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+; CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 ; CHECK: CYCLES WITH DIVERGENT EXIT:
 ; CHECK:   depth=1: entries(P T) Q
 define amdgpu_kernel void @basic(i32 %a, i32 %b, i32 %c) {
@@ -39,7 +39,7 @@ exit:
 }
 
 ; CHECK-LABEL: UniformityInfo for function 'outer_reducible':
-; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+; CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 ; CHECK: CYCLES WITH DIVERGENT EXIT:
 ; CHECK:   depth=1: entries(H) P T R Q
 define amdgpu_kernel void @outer_reducible(i32 %a, i32 %b, i32 %c) {
@@ -96,7 +96,7 @@ exit:
 ; unless the def itself is divergent.
 ;
 ; CHECK-LABEL: UniformityInfo for function 'no_divergent_exit':
-; CHECK: CYCLES ASSSUMED DIVERGENT:
+; CHECK: CYCLES ASSUMED DIVERGENT:
 ; CHECK:   depth=1: entries(H B) C
 ; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
 define amdgpu_kernel void @no_divergent_exit(i32 %n, i32 %a, i32 %b) #0 {
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-2.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-2.ll
index 6b8e7a1..91b4446 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-2.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-2.ll
@@ -571,7 +571,7 @@ X:
 
 define amdgpu_kernel void @always_uniform() {
 ; CHECK-LABEL: UniformityInfo for function 'always_uniform':
-; CHECK: CYCLES ASSSUMED DIVERGENT:
+; CHECK: CYCLES ASSUMED DIVERGENT:
 ; CHECK:   depth=1: entries(bb2 bb3)
 
 bb:
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll
index feb2949..503c6f8 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll
@@ -32,7 +32,7 @@
 
 define amdgpu_kernel void @nested_irreducible(i32 %a, i32 %b, i32 %c) {
 ; CHECK=LABEL: UniformityInfo for function 'nested_irreducible':
-; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+; CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 ; CHECK: CYCLES WITH DIVERGENT EXIT:
 ; CHECK-DAG:   depth=2: entries(P T) R Q
 ; CHECK-DAG:   depth=1: entries(H) S P T R Q U
@@ -119,7 +119,7 @@ exit:
 
 define amdgpu_kernel void @header_label_1(i32 %a, i32 %b, i32 %c) {
 ; CHECK=LABEL: UniformityInfo for function 'header_label_1':
-; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+; CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 ; CHECK: CYCLES WITH DIVERGENT EXIT:
 ; CHECK:  depth=1: entries(H) Q P U T R
 entry:
@@ -187,7 +187,7 @@ exit:
 
 define amdgpu_kernel void @header_label_2(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: UniformityInfo for function 'header_label_2':
-; CHECK-NOT: CYCLES ASSSUMED DIVERGENT:
+; CHECK-NOT: CYCLES ASSUMED DIVERGENT:
 ; CHECK-NOT: CYCLES WITH DIVERGENT EXIT:
 entry:
   %cond.uni = icmp slt i32 %a, 0
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
index 7466c23..f5668ce 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
@@ -113,11 +113,40 @@ define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimz() !reqd_work_gr
   ret void
 }
 
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_z_uniform_len_1'
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_z_uniform_len_1(ptr %o) !reqd_work_group_size !4 {
+  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+  store i32 %id.z, ptr %o
+  ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_div_wavefront_size'
+; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_x_div_wavefront_size(ptr %o) #3 !reqd_work_group_size !5 {
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  %id.sg = lshr i32 %id.x, 6
+  store i32 %id.sg, ptr %o
+  ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_y_uniform_in_subgroup'
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_y_uniform_in_subgroup(ptr %o) #3 !reqd_work_group_size !5 {
+  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+  store i32 %id.y, ptr %o
+  ret void
+}
+
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
 attributes #2 = { "amdgpu-flat-work-group-size"="1,1" }
+attributes #3 = { "target-cpu"="gfx900" "amdgpu-flat-work-group-size"="256,256" }
 
 !0 = !{i32 1, i32 1, i32 1}
 !1 = !{i32 2, i32 1, i32 1}
 !2 = !{i32 1, i32 2, i32 1}
 !3 = !{i32 1, i32 1, i32 2}
+!4 = !{i32 64, i32 1, i32 1}
+!5 = !{i32 128, i32 2, i32 1}
diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/non-header-join.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/non-header-join.ll
index 5fbf435..3525313 100644
--- a/llvm/test/Analysis/UniformityAnalysis/NVPTX/non-header-join.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/non-header-join.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
 
-; CHECK: CYCLES ASSSUMED DIVERGENT:
+; CHECK: CYCLES ASSUMED DIVERGENT:
 ; CHECK-NEXT: depth=1: entries(if.end16 for.cond1) for.body4
 
 define void @foo(i1 %b) {
diff --git a/llvm/test/Analysis/ValueTracking/pr152700.ll b/llvm/test/Analysis/ValueTracking/pr152700.ll
new file mode 100644
index 0000000..91644c5
--- /dev/null
+++ b/llvm/test/Analysis/ValueTracking/pr152700.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare noundef i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+declare i32 @llvm.umin.i32(i32, i32)
+define i32 @foo(i1 %c, i32 %arg) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[I:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+; CHECK-NEXT:    br i1 [[C]], label %[[BB_1:.*]], label %[[BB_2:.*]]
+; CHECK:       [[BB_1]]:
+; CHECK-NEXT:    br label %[[BB_2]]
+; CHECK:       [[BB_2]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[I]], %[[ENTRY]] ], [ 0, %[[BB_1]] ]
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.umin.i32(i32 [[PHI]], i32 [[ARG]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %i = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+  br i1 %c, label %bb.1, label %bb.2
+bb.1:
+  br label %bb.2
+bb.2:
+  %phi = phi i32 [ %i, %entry ], [ 0, %bb.1 ]
+  %res = call i32 @llvm.umin.i32(i32 %phi, i32 %arg)
+  ret i32 %res
+}